octomind 0.18.0

Session-based AI development assistant with conversational codebase interaction, multimodal vision support, built-in MCP tools, and multi-provider AI integration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
// Copyright 2025 Muvon Un Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Response processing module - main orchestrator

pub mod tool_execution;
pub mod tool_result_processor;

use super::{CostTracker, MessageHandler, ToolProcessor};
use crate::config::Config;
use crate::log_debug;
use crate::providers::ThinkingBlock;
use crate::session::chat::assistant_output::print_assistant_response;
use crate::session::chat::display_thinking;
use crate::session::chat::get_animation_manager;
use crate::session::chat::session::ChatSession;
use crate::session::chat::session_continuation;
use crate::session::ProviderExchange;
use anyhow::Result;
use colored::Colorize;

use crate::session::output::{OutputMode, OutputSink};
use crate::websocket::{
	AssistantPayload, CostPayload, ServerMessage, ThinkingPayload, ToolResultPayload,
};

// Response processing parameters struct
pub struct ResponseProcessingParams<'a, S: OutputSink> {
	pub content: String,
	pub exchange: ProviderExchange,
	pub tool_calls: Option<Vec<crate::mcp::McpToolCall>>,
	pub thinking: Option<ThinkingBlock>,
	pub finish_reason: Option<String>,
	pub response_id: Option<String>,
	pub chat_session: &'a mut ChatSession,
	pub config: &'a Config,
	pub role: &'a str,
	pub operation_cancelled: tokio::sync::watch::Receiver<bool>,
	pub sink: S,
	pub mode: OutputMode,
}

impl<'a, S: OutputSink> ResponseProcessingParams<'a, S> {
	#[allow(clippy::too_many_arguments)]
	pub fn new(
		content: String,
		exchange: ProviderExchange,
		tool_calls: Option<Vec<crate::mcp::McpToolCall>>,
		finish_reason: Option<String>,
		response_id: Option<String>,
		chat_session: &'a mut ChatSession,
		config: &'a Config,
		role: &'a str,
		operation_cancelled: tokio::sync::watch::Receiver<bool>,
		sink: S,
	) -> Self {
		Self {
			content,
			exchange,
			tool_calls,
			thinking: None,
			finish_reason,
			response_id,
			chat_session,
			config,
			role,
			operation_cancelled,
			sink,
			mode: OutputMode::Interactive, // Default
		}
	}

	/// Set thinking block
	pub fn with_thinking(mut self, thinking: Option<ThinkingBlock>) -> Self {
		self.thinking = thinking;
		self
	}

	/// Set output mode (preferred over with_interactive)
	pub fn with_mode(mut self, mode: OutputMode) -> Self {
		self.mode = mode;
		self
	}

	/// Emit a message through the output sink
	/// This is used for streaming JSON output (WebSocket/JSONL)
	pub fn emit(&self, msg: ServerMessage) {
		self.sink.emit(msg);
	}
}

fn emit_thinking_event<S: OutputSink>(
	params: &ResponseProcessingParams<'_, S>,
	thinking: &ThinkingBlock,
	session_id: &str,
) {
	params.emit(ServerMessage::Thinking(ThinkingPayload {
		content: thinking.content.clone(),
		session_id: session_id.to_string(),
	}));
}

// Helper function to log debug information about the response
fn log_response_debug(
	_config: &Config,
	finish_reason: &Option<String>,
	tool_calls: &Option<Vec<crate::mcp::McpToolCall>>,
) {
	if let Some(ref reason) = finish_reason {
		log_debug!("Processing response with finish_reason: {}", reason);
	}
	if let Some(ref calls) = tool_calls {
		log_debug!("Processing {} tool calls", calls.len());
	}
}

// Helper function to handle final response when no tool calls are present
#[allow(clippy::too_many_arguments)]
fn handle_final_response(
	content: &str,
	thinking: &Option<ThinkingBlock>,
	response_id: Option<String>,
	chat_session: &mut ChatSession,
	config: &Config,
	role: &str,
	mode: OutputMode,
) -> Result<()> {
	// Display thinking first if present (only in interactive mode to avoid clutter)
	if mode.is_interactive() {
		if let Some(ref thinking_block) = thinking {
			display_thinking(thinking_block);
		}
	}

	// CRITICAL FIX: Add the assistant message with response_id to maintain conversation continuity
	// The response_id is essential for OpenAI Responses API to track conversation state
	let assistant_message = crate::session::Message {
		role: "assistant".to_string(),
		content: content.to_string(),
		timestamp: std::time::SystemTime::now()
			.duration_since(std::time::UNIX_EPOCH)
			.unwrap_or_default()
			.as_secs(),
		cached: false,
		tool_call_id: None,
		name: None,
		tool_calls: None,
		images: None,
		videos: None,
		thinking: None,
		id: response_id, // CRITICAL: Set the response_id for conversation continuity
	};

	chat_session.session.messages.push(assistant_message);
	chat_session.last_response = content.to_string();

	// CRITICAL FIX: DO NOT track cost/tokens here - already tracked by CostTracker::track_exchange_cost()
	// in api_executor.rs:163. Tracking here causes DUPLICATE cost/token counting.
	// Only log the exchange for debugging purposes.

	// CRITICAL FIX: ALWAYS print assistant response (both interactive and non-interactive modes)
	// The mode controls animations/prompts, NOT whether to show the AI's response
	// Skip if using structured output (JSONL/WebSocket - handled by sink)
	if mode.is_terminal_mode() {
		print_assistant_response(content, config, role, thinking);
	}

	// Display cost line only for non-interactive mode or specific scenarios
	// Skip for interactive mode to avoid duplication before user input prompt
	// Skip if using structured output (JSONL/WebSocket - handled by sink)
	use std::io::IsTerminal;
	if !std::io::stdin().is_terminal() && mode.is_terminal_mode() {
		// Non-interactive mode - always show cost line
		CostTracker::display_cost_line(chat_session);
	}
	// Interactive mode: Skip cost line here to avoid duplication before user input

	Ok(())
}

// Get the actual server name for a tool (async version that matches execution)
pub async fn get_tool_server_name_async(tool_name: &str, _config: &Config) -> String {
	// STATIC ONLY: Use pre-built static tool map
	crate::mcp::tool_map::get_tool_server_name(tool_name).unwrap_or_else(|| "unknown".to_string())
}

// Display execution intent with headers upfront (before execution)
async fn display_tool_parameters_only(config: &Config, tool_calls: &[crate::mcp::McpToolCall]) {
	if !tool_calls.is_empty() {
		// Always log debug info if debug enabled
		log_debug!("Found {} tool calls in response", tool_calls.len());

		let is_single_tool = tool_calls.len() == 1;

		// Show headers upfront - with indices for multiple tools, without for single tool
		for (index, call) in tool_calls.iter().enumerate() {
			let tool_index = index + 1;

			// Get server name using same logic as execution
			let server_name = get_tool_server_name_async(&call.tool_name, config).await;

			// Create formatted header - with or without index based on tool count
			let title = if is_single_tool {
				format!(
					" {} | {} ",
					call.tool_name.bright_cyan(),
					server_name.bright_blue()
				)
			} else {
				format!(
					" [{}] {} | {} ",
					tool_index,
					call.tool_name.bright_cyan(),
					server_name.bright_blue()
				)
			};
			let separator_length = 70.max(title.len() + 4);
			let dashes = "─".repeat(separator_length - title.len());
			let separator = format!("──{}{}──", title, dashes.dimmed());
			println!("{}", separator);

			// Show parameters based on log level
			if config.get_log_level().is_debug_enabled() || config.get_log_level().is_info_enabled()
			{
				display_tool_parameters_full(call, config);
			}

			// Add spacing between tools (except for the last one)
			if index < tool_calls.len() - 1 {
				println!();
			}
		}

		// Add final spacing before execution starts
		println!();
	}
}

// Display tool parameters in full detail (for info/debug modes)
pub fn display_tool_parameters_full(tool_call: &crate::mcp::McpToolCall, config: &Config) {
	// Delegate to shared implementation
	crate::session::chat::tool_display::display_tool_parameters_full(tool_call, config);
}

// Helper function to resolve current tool calls
fn resolve_tool_calls(
	current_tool_calls_param: &mut Option<Vec<crate::mcp::McpToolCall>>,
	current_content: &str,
) -> Vec<crate::mcp::McpToolCall> {
	if let Some(calls) = current_tool_calls_param.take() {
		// Use the tool calls from the API response only once
		if !calls.is_empty() {
			calls
		} else {
			crate::mcp::parse_tool_calls(current_content) // Fallback
		}
	} else {
		// For follow-up iterations, parse from content if any new tool calls exist
		crate::mcp::parse_tool_calls(current_content)
	}
}

// Helper function to check for cancellation
fn check_cancellation(operation_cancelled: &tokio::sync::watch::Receiver<bool>) -> Result<()> {
	if *operation_cancelled.borrow() {
		crate::log_debug!("Operation cancelled by user.");
		return Err(anyhow::anyhow!("Operation cancelled"));
	}
	Ok(())
}

// Helper function to add assistant message with tool calls preserved
fn add_assistant_message_with_tool_calls(
	chat_session: &mut ChatSession,
	current_content: &str,
	current_exchange: &ProviderExchange,
	response_id: Option<String>,
	thinking: &Option<ThinkingBlock>,
	_config: &Config,
	_role: &str,
) -> Result<()> {
	// CRITICAL FIX: We need to add the assistant message with tool_calls PRESERVED
	// The standard add_assistant_message only stores text content, but we need
	// to preserve the tool_calls from the original API response for proper conversation flow

	// Extract the original tool_calls from the exchange response based on provider
	let original_tool_calls = MessageHandler::extract_original_tool_calls(current_exchange);

	// Create the assistant message directly with tool_calls preserved from the exchange
	let thinking_value = thinking
		.as_ref()
		.and_then(|block| serde_json::to_value(block).ok());

	let assistant_message = crate::session::Message {
		role: "assistant".to_string(),
		content: current_content.to_string(),
		timestamp: std::time::SystemTime::now()
			.duration_since(std::time::UNIX_EPOCH)
			.unwrap_or_default()
			.as_secs(),
		cached: false,
		tool_call_id: None,
		name: None,
		tool_calls: original_tool_calls.clone(),
		images: None,
		videos: None,
		thinking: thinking_value,
		id: response_id.clone(),
	};

	// Add the assistant message to the session
	chat_session.session.messages.push(assistant_message);

	// Update last response - no cost tracking here as it will be handled by follow-up processing
	chat_session.last_response = current_content.to_string();

	// CRITICAL FIX: DO NOT track cost/tokens here - already tracked by CostTracker::track_exchange_cost()
	// in api_executor.rs:163. Tracking here causes DUPLICATE cost/token counting.

	// Log the assistant response and exchange
	let _ = crate::session::logger::log_assistant_response(
		&chat_session.session.info.name,
		current_content,
	);
	let _ = crate::session::logger::log_raw_exchange(current_exchange);

	Ok(())
}

// Function to process response, handling tool calls recursively
pub async fn process_response<S: OutputSink>(
	params: ResponseProcessingParams<'_, S>,
) -> Result<()> {
	// Check if operation has been cancelled at the very start
	check_cancellation(&params.operation_cancelled)?;

	// CRITICAL FIX: Stop animation BEFORE any output to prevent ghost text
	// The animation must be stopped before printing assistant response to avoid
	// the spinner text remaining visible on the terminal after the response is printed
	get_animation_manager().stop_current().await;

	// Debug logging for finish_reason and tool calls
	log_response_debug(params.config, &params.finish_reason, &params.tool_calls);

	// CONTINUATION FIX: Removed early continuation check that was causing tool_calls/tool_result mismatch
	// Continuation is now handled AFTER tool processing completes to ensure conversation integrity

	// CRITICAL FIX: Check for continuation BEFORE any tool processing
	// LOGIC: If continuation_pending=true, skip ALL tool processing and handle continuation immediately
	// FLOW: Token limit → inject_summary_request() → continuation_pending=true → AI responds with summary
	//       → Check continuation_pending FIRST → If true: skip tools, process continuation immediately
	//       → If false: continue normal tool processing
	// IMPORTANT: This prevents tool calls in summary responses from interfering with continuation
	let has_tool_calls = params
		.tool_calls
		.as_ref()
		.is_some_and(|calls| !calls.is_empty());

	// Check if continuation is pending - if so, handle it immediately and skip tools
	if params.chat_session.continuation_pending
		&& session_continuation::process_continuation_response(
			params.chat_session,
			&params.content,
			has_tool_calls,
			params.config,
			params.role,
		)
		.await?
	{
		// Continuation was processed - handle it immediately with the summary response
		return process_continuation_message_immediately(params).await;
	}

	// First, add the user message before processing response
	let last_message = params.chat_session.session.messages.last();
	if params.mode.is_terminal_mode() && last_message.is_none_or(|msg| msg.role != "user") {
		// This is an edge case - the content variable here is the AI response, not user input
		// We should have added the user message earlier in the main run_interactive_session
		println!(
			"{}",
			"Warning: User message not found in session. This is unexpected.".yellow()
		);
	}

	// Initialize tool processor
	let mut tool_processor = ToolProcessor::new();

	// Track if thinking has been displayed (to avoid displaying twice)
	let mut thinking_displayed = false;

	// Process original content first, then any follow-up tool calls
	let mut current_content = params.content.clone();
	let mut current_exchange = params.exchange.clone(); // Clone to avoid moving params
	let mut current_tool_calls_param = params.tool_calls.clone(); // Track of tool_calls parameter
	let mut current_response_id = params.response_id.clone(); // Track response_id through iterations
	let mut current_thinking = params.thinking.clone(); // Track thinking only for the current response
	let mut last_emitted_thinking: Option<String> = None;
	let operation_cancelled_ref = &params.operation_cancelled; // Create a reference to avoid moves

	loop {
		// Check for cancellation at the start of each loop iteration
		check_cancellation(operation_cancelled_ref)?;

		// Check for tool calls if MCP has any servers configured
		if !params.config.mcp.servers.is_empty() {
			// Resolve current tool calls for this iteration
			let current_tool_calls =
				resolve_tool_calls(&mut current_tool_calls_param, &current_content);

			if !current_tool_calls.is_empty() {
				let session_id = params.chat_session.session.info.name.clone();
				if params.mode.should_suppress_cli_output() {
					if let Some(ref thinking_block) = current_thinking {
						if last_emitted_thinking.as_deref() != Some(thinking_block.content.as_str())
						{
							emit_thinking_event(&params, thinking_block, &session_id);
							last_emitted_thinking = Some(thinking_block.content.clone());
						}
					}
				}

				// Display thinking first if present and not yet displayed - ONLY in interactive mode
				if params.mode.is_interactive() && !thinking_displayed {
					if let Some(ref thinking_block) = current_thinking {
						display_thinking(thinking_block);
						thinking_displayed = true;
					}
				}

				// Display the content to the user FIRST (before adding to session) - ONLY in interactive mode
				// Skip if using structured output (JSONL/WebSocket - handled by sink)
				if params.mode.is_interactive() {
					print_assistant_response(
						&current_content,
						params.config,
						params.role,
						&current_thinking,
					);
				}

				// Display tool parameters upfront (headers will be shown per-tool during execution) - ONLY in interactive mode
				if params.mode.is_interactive() {
					display_tool_parameters_only(params.config, &current_tool_calls).await;
				}

				// Clone operation_cancelled to avoid borrow issues
				let operation_cancelled_clone = params.operation_cancelled.clone();

				// Early exit if cancellation was requested BEFORE adding message
				if *operation_cancelled_clone.borrow() {
					crate::log_debug!("Operation cancelled by user.");
					// Do NOT add any message to the session since tools weren't executed
					return Ok(());
				}

				// Execute all tool calls in parallel using the new module
				let (tool_results, total_tool_time_ms) =
					match tool_execution::execute_tools_parallel(
						current_tool_calls.clone(),
						params.chat_session,
						params.config,
						&mut tool_processor,
						operation_cancelled_clone.clone(),
						params.mode,
					)
					.await
					{
						Ok(results) => results,
						Err(e) => {
							// Check if this was a cancellation
							if e.to_string().contains("cancelled")
								|| *operation_cancelled_clone.borrow()
							{
								crate::log_debug!("Operation cancelled by user.");
								// Don't add assistant message since tools weren't executed
								return Ok(());
							}
							return Err(e);
						}
					};

				// Emit tool results through sink (WebSocket/JSONL)
				let session_id = params.chat_session.session.info.name.clone();
				for tool_result in &tool_results {
					let actual_content = crate::mcp::extract_mcp_content(&tool_result.result);
					let success = !tool_result
						.result
						.get("isError")
						.and_then(|v| v.as_bool())
						.unwrap_or(false);
					let tool_msg = ServerMessage::ToolResult(ToolResultPayload {
						tool: tool_result.tool_name.clone(),
						tool_id: tool_result.tool_id.clone(),
						server: crate::session::chat::response::get_tool_server_name_async(
							&tool_result.tool_name,
							params.config,
						)
						.await,
						content: actual_content,
						success,
						session_id: session_id.clone(),
					});
					params.emit(tool_msg);
				}

				// Check for cancellation BEFORE adding assistant message
				if *operation_cancelled_clone.borrow() {
					if params.mode.is_terminal_mode() {
						println!("{}", "\nTool execution cancelled.".bright_yellow());
					}
					// Don't add assistant message since tools were cancelled
					return Ok(());
				}

				// ONLY add assistant message if tools were NOT cancelled
				add_assistant_message_with_tool_calls(
					params.chat_session,
					&current_content,
					&current_exchange,
					current_response_id.clone(), // CRITICAL FIX: Use current_response_id from loop, not params.response_id
					&current_thinking,
					params.config,
					params.role,
				)?;

				// Process tool results if any exist
				if !tool_results.is_empty() {
					// Process tool results and handle follow-up API calls using the new module
					if let Some((
						new_content,
						new_exchange,
						new_tool_calls,
						new_response_id,
						new_thinking,
					)) = tool_result_processor::process_tool_results(
						tool_results,
						total_tool_time_ms,
						params.chat_session,
						params.config,
						params.role,
						operation_cancelled_clone.clone(),
					)
					.await?
					{
						// Update current content for next iteration
						current_content = new_content;
						current_exchange = new_exchange;
						current_tool_calls_param = new_tool_calls;
						current_response_id = new_response_id; // Update response_id from follow-up response
											 // CRITICAL FIX: Preserve thinking from follow-up response for Moonshot
											 // Moonshot requires reasoning_content for ALL assistant messages with tool calls
						current_thinking = new_thinking;

						// Check if there are more tools to process
						if current_tool_calls_param.is_some()
							&& !current_tool_calls_param.as_ref().unwrap().is_empty()
						{
							// Continue processing the new content with tool calls
							continue;
						} else {
							// Check if there are more tool calls in the content itself
							let more_tools = crate::mcp::parse_tool_calls(&current_content);
							if !more_tools.is_empty() {
								// Log if debug mode is enabled
								log_debug!(
									"Found {} more tool calls to process in content",
									more_tools.len()
								);
								continue;
							} else {
								// No more tool calls, break out of the loop
								break;
							}
						}
					} else {
						// No follow-up response - check if this was due to continuation being triggered
						if params.chat_session.continuation_pending {
							log_debug!("Tool processing stopped due to continuation trigger - breaking out of tool loop to handle continuation");
							// Break out of tool processing loop to let the main continuation check handle it
							break;
						}
						// No follow-up response (cancelled or error), exit
						return Ok(());
					}
				} else {
					// No tool results - check if there were more tools to execute directly
					let more_tools = crate::mcp::parse_tool_calls(&current_content);
					if !more_tools.is_empty() {
						// Log if debug mode is enabled
						log_debug!(
							"Found {} more tool calls to process (no previous tool results)",
							more_tools.len()
						);
						// If there are more tool calls later in the response, continue processing
						continue;
					} else {
						// No more tool calls, exit the loop
						break;
					}
				}
			} else {
				// No tool calls in this content, break out of the loop
				break;
			}
		} else {
			// MCP not enabled, break out of the loop
			break;
		}
	}

	// CRITICAL FIX: Check for continuation after tool processing loop
	// When continuation is triggered during tool execution, we broke out of the tool loop (line 430)
	// The main session runner will detect continuation_pending and handle it automatically (runner.rs:984-988)
	// We just need to skip final response processing and return cleanly
	if params.chat_session.continuation_pending {
		log_debug!("Continuation pending after tool processing - skipping final response, main session loop will handle continuation");
		// DO NOT process final response when continuation is pending
		// The main session loop will detect continuation_pending and continue to process the summary request
		return Ok(());
	}

	// Handle final response using the helper function (only when no continuation is pending and no tool calls)
	// When tool calls are present, we already created an assistant message with add_assistant_message_with_tool_calls
	// Calling handle_final_response would create a duplicate assistant message without id
	// Pass thinking only if it hasn't been displayed yet (in tool call loop)
	let session_id = params.chat_session.session.info.name.clone();
	let thinking_for_final = if thinking_displayed {
		None
	} else {
		current_thinking.clone()
	};
	if params.mode.should_suppress_cli_output() {
		if let Some(ref thinking_block) = thinking_for_final {
			if last_emitted_thinking.as_deref() != Some(thinking_block.content.as_str()) {
				emit_thinking_event(&params, thinking_block, &session_id);
			}
		}
	}

	// Emit assistant message through sink (WebSocket/JSONL)
	params.emit(ServerMessage::Assistant(AssistantPayload {
		content: current_content.clone(),
		session_id: session_id.clone(),
	}));

	handle_final_response(
		&current_content,
		&thinking_for_final,
		current_response_id, // Use current_response_id (updated from follow-up responses)
		params.chat_session,
		params.config,
		params.role,
		params.mode,
	)?;

	// Emit cost message through sink (WebSocket/JSONL)
	let total_tokens = params.chat_session.session.info.input_tokens
		+ params.chat_session.session.info.output_tokens
		+ params.chat_session.session.info.cache_read_tokens
		+ params.chat_session.session.info.cache_write_tokens
		+ params.chat_session.session.info.reasoning_tokens;
	let cost_msg = ServerMessage::Cost(CostPayload {
		session_tokens: total_tokens,
		session_cost: params.chat_session.session.info.total_cost,
		input_tokens: params.chat_session.session.info.input_tokens,
		output_tokens: params.chat_session.session.info.output_tokens,
		cache_read_tokens: params.chat_session.session.info.cache_read_tokens,
		cache_write_tokens: params.chat_session.session.info.cache_write_tokens,
		reasoning_tokens: params.chat_session.session.info.reasoning_tokens,
		session_id,
	});

	params.emit(cost_msg);

	// Inject compression hint if applicable (non-intrusive, appended to response)
	// Skip if using structured output (JSONL/WebSocket - handled by sink)
	if params.mode.is_terminal_mode() {
		if let Some(hint) = params.chat_session.get_compression_hint(params.config) {
			println!("{}", hint);
		}
	}
	Ok(())
}

/// Process continuation message immediately after session reset
/// This makes the continuation completely invisible to the user
pub async fn process_continuation_message_immediately<S: OutputSink>(
	params: ResponseProcessingParams<'_, S>,
) -> Result<()> {
	use crate::session::ChatCompletionWithValidationParams;
	use crate::{log_debug, log_info};

	log_info!("Processing continuation message automatically...");

	// Get the last message which should be our continuation message
	let continuation_message = params
		.chat_session
		.session
		.messages
		.last()
		.ok_or_else(|| anyhow::anyhow!("No continuation message found"))?;

	if continuation_message.role != "user" {
		return Err(anyhow::anyhow!(
			"Expected user continuation message, found: {}",
			continuation_message.role
		));
	}

	// Clone messages to avoid borrowing conflicts
	let messages = params.chat_session.session.messages.clone();

	// Prepare API call parameters for continuation using the session's current settings
	let chat_params = ChatCompletionWithValidationParams::new(
		&messages,
		&params.chat_session.model,
		params.chat_session.temperature,
		params.chat_session.top_p,
		params.chat_session.top_k,
		params.chat_session.max_tokens,
		params.config,
	)
	.with_max_retries(params.chat_session.max_retries)
	.with_cancellation_token(params.operation_cancelled.clone())
	.as_continuation_call(); // CRITICAL FIX: Mark as continuation call to prevent infinite retry loops

	// Make API call with continuation message
	match crate::session::chat_completion_with_validation(chat_params).await {
		Ok(response) => {
			log_debug!("Continuation API call successful");

			// Process the continuation response recursively using Box::pin to avoid stack overflow
			let continuation_params = ResponseProcessingParams::new(
				response.content,
				response.exchange,
				response.tool_calls,
				response.finish_reason,
				response.response_id,
				params.chat_session,
				params.config,
				params.role,
				params.operation_cancelled.clone(),
				params.sink.clone(), // Clone the sink
			)
			.with_mode(params.mode); // Preserve output mode

			// Use Box::pin to avoid recursion compilation issues
			Box::pin(process_response(continuation_params)).await
		}
		Err(e) => {
			// CRITICAL FIX: Reset continuation state when API call fails after exhausting retries
			// This prevents infinite retry loops when rate limits are hit
			log_info!(
				"Continuation API call failed after exhausting retries: {}",
				e
			);

			// Reset continuation state to prevent infinite loop
			params.chat_session.continuation_pending = false;
			log_debug!("Continuation state reset due to API failure - breaking continuation loop");

			// Return the error to properly propagate it up the chain
			// This will cause the session runner to handle the error appropriately
			Err(e)
		}
	}
}