codetether_agent/session/helper/
prompt.rs

1//! Core agentic prompt loop used by [`Session::prompt`](super::super::Session::prompt).
2//!
3//! This is the non-streaming, non-event-emitting variant. It loads the
4//! provider registry from Vault, runs the completion/tool-call loop up to
5//! `max_steps` times, and returns the final plain-text answer.
6//!
7//! The code here was extracted from `src/session/mod.rs` verbatim to keep
8//! the Session facade small and keep the loop's behavior stable. Refinements
9//! to the loop should be made here (and mirrored in [`prompt_events`](super::prompt_events)
10//! where appropriate).
11// TODO: Keep this loop in sync with `prompt_events.rs` until both prompt
12// loops are consolidated into one shared implementation.
13
14use std::collections::HashSet;
15use std::sync::Arc;
16
17use anyhow::Result;
18use chrono::Utc;
19use serde_json::json;
20use uuid::Uuid;
21
22use crate::audit::{AuditCategory, AuditOutcome, try_audit_log};
23use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
24use crate::event_stream::ChatEvent;
25use crate::provider::{
26    CompletionRequest, ContentPart, Message, ProviderRegistry, Role, parse_model_string,
27};
28use crate::rlm::router::AutoProcessContext;
29use crate::rlm::{RlmConfig, RlmRouter, RoutingContext};
30use crate::tool::ToolRegistry;
31
32use super::super::{DEFAULT_MAX_STEPS, Session, SessionResult};
33use super::bootstrap::{
34    inject_tool_prompt, list_tools_bootstrap_definition, list_tools_bootstrap_output,
35};
36use super::build::{
37    build_request_requires_tool, is_build_agent, should_force_build_tool_first_retry,
38};
39use super::confirmation::{
40    auto_apply_pending_confirmation, pending_confirmation_tool_result_content,
41    tool_result_requires_confirmation,
42};
43use super::defaults::default_model_for_provider;
44use super::edit::{detect_stub_in_tool_input, normalize_tool_call_for_execution};
45use super::error::{is_prompt_too_long_error, is_retryable_upstream_error};
46use super::loop_constants::{
47    BUILD_MODE_TOOL_FIRST_MAX_RETRIES, BUILD_MODE_TOOL_FIRST_NUDGE, CODESEARCH_THRASH_NUDGE,
48    FORCE_FINAL_ANSWER_NUDGE, MAX_CONSECUTIVE_CODESEARCH_NO_MATCHES, MAX_CONSECUTIVE_SAME_TOOL,
49    NATIVE_TOOL_PROMISE_NUDGE, NATIVE_TOOL_PROMISE_RETRY_MAX_RETRIES,
50    POST_EDIT_VALIDATION_MAX_RETRIES,
51};
52use super::markup::normalize_textual_tool_calls;
53use super::provider::{
54    prefers_temperature_one, resolve_provider_for_session_request,
55    should_retry_missing_native_tool_call, temperature_is_deprecated,
56};
57use super::router::{build_proactive_lsp_context_message, choose_router_target};
58use super::runtime::{
59    enrich_tool_input_with_runtime_context, is_codesearch_no_match_output, is_interactive_tool,
60    is_local_cuda_provider, local_cuda_light_system_prompt,
61};
62use super::text::extract_text_content;
63use super::token::{
64    context_window_for_model, estimate_tokens_for_messages, session_completion_max_tokens,
65};
66use super::validation::{build_validation_report, capture_git_dirty_files, track_touched_files};
67use crate::session::context::derive_context;
68
69/// Execute a prompt against the session and return a [`SessionResult`].
70///
71/// See [`Session::prompt`](super::super::Session::prompt) for the public-facing contract.
72pub(crate) async fn run_prompt(session: &mut Session, message: &str) -> Result<SessionResult> {
73    let registry = ProviderRegistry::from_vault().await?;
74    session.resolve_subcall_provider(&registry);
75
76    let providers = registry.list();
77    if providers.is_empty() {
78        anyhow::bail!(
79            "No providers available. Configure provider credentials in HashiCorp Vault (for ChatGPT subscription Codex use `codetether auth codex`; for Copilot use `codetether auth copilot`)."
80        );
81    }
82
83    tracing::info!("Available providers: {:?}", providers);
84
85    let (provider_name, model_id) = parse_session_model_selector(session, &providers);
86
87    let selected_provider =
88        resolve_provider_for_session_request(providers.as_slice(), provider_name.as_deref())?;
89
90    let provider = registry
91        .get(selected_provider)
92        .ok_or_else(|| anyhow::anyhow!("Provider {} not found", selected_provider))?;
93
94    session.add_message(Message {
95        role: Role::User,
96        content: vec![ContentPart::Text {
97            text: message.to_string(),
98        }],
99    });
100
101    if session.title.is_none() {
102        session.generate_title().await?;
103    }
104
105    let model = if !model_id.is_empty() {
106        model_id
107    } else {
108        default_model_for_provider(selected_provider)
109    };
110
111    // Phase A: oversized-user-message compression now happens inside
112    // `derive_context` on a clone. Keeping the original text in
113    // `session.messages` means `session_recall` and the MinIO history
114    // sink can still see what the user actually typed.
115
116    let tool_registry = ToolRegistry::with_provider_arc(Arc::clone(&provider), model.clone());
117    let tool_definitions: Vec<_> = tool_registry
118        .definitions()
119        .into_iter()
120        .filter(|tool| !is_interactive_tool(&tool.name))
121        .collect();
122
123    let temperature = if temperature_is_deprecated(&model) {
124        None
125    } else if prefers_temperature_one(&model) {
126        Some(1.0)
127    } else {
128        Some(0.7)
129    };
130
131    tracing::info!("Using model: {} via provider: {}", model, selected_provider);
132    tracing::info!("Available tools: {}", tool_definitions.len());
133
134    let model_supports_tools = !matches!(
135        selected_provider,
136        "gemini-web" | "local-cuda" | "local_cuda" | "localcuda"
137    );
138    let advertised_tool_definitions = if model_supports_tools {
139        tool_definitions.clone()
140    } else {
141        vec![list_tools_bootstrap_definition()]
142    };
143
144    let cwd = session
145        .metadata
146        .directory
147        .clone()
148        .unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
149    let system_prompt = if is_local_cuda_provider(selected_provider) {
150        local_cuda_light_system_prompt()
151    } else {
152        crate::agent::builtin::build_system_prompt(&cwd)
153    };
154    let system_prompt = if !model_supports_tools && !advertised_tool_definitions.is_empty() {
155        inject_tool_prompt(&system_prompt, &advertised_tool_definitions)
156    } else {
157        system_prompt
158    };
159
160    let max_steps = session.max_steps.unwrap_or(DEFAULT_MAX_STEPS);
161    let mut final_output = String::new();
162    let baseline_git_dirty_files = capture_git_dirty_files(&cwd).await;
163    let mut touched_files = HashSet::new();
164    let mut validation_retry_count: u8 = 0;
165
166    let mut last_tool_sig: Option<String> = None;
167    let mut consecutive_same_tool: u32 = 0;
168    let mut consecutive_codesearch_no_matches: u32 = 0;
169    let mut build_mode_tool_retry_count: u8 = 0;
170    let mut native_tool_promise_retry_count: u8 = 0;
171    let turn_id = Uuid::new_v4().to_string();
172
173    let tool_router: Option<ToolCallRouter> = {
174        let cfg = ToolRouterConfig::from_env();
175        match ToolCallRouter::from_config(&cfg) {
176            Ok(r) => r,
177            Err(e) => {
178                tracing::warn!(error = %e, "FunctionGemma tool router init failed; disabled");
179                None
180            }
181        }
182    };
183
184    for step in 1..=max_steps {
185        tracing::info!(step = step, "Agent step starting");
186
187        super::cost_guard::enforce_cost_budget()?;
188
189        // Phase A: derive the per-step LLM context from a clone of
190        // `session.messages` rather than mutating history in place. The
191        // experimental strategies, RLM-powered context-window
192        // enforcement, and orphan-pair repair all run against the
193        // clone; the canonical transcript stays append-only.
194        let mut derived = derive_context(
195            session,
196            Arc::clone(&provider),
197            &model,
198            &system_prompt,
199            &advertised_tool_definitions,
200            None,
201            None,
202        )
203        .await?;
204
205        let proactive_lsp_message = build_proactive_lsp_context_message(
206            selected_provider,
207            step,
208            &tool_registry,
209            &session.messages,
210            &cwd,
211        )
212        .await;
213
214        let mut attempt = 0;
215        let mut upstream_retry_count: u8 = 0;
216        const MAX_UPSTREAM_RETRIES: u8 = 3;
217        let response = loop {
218            attempt += 1;
219
220            let mut messages = vec![Message {
221                role: Role::System,
222                content: vec![ContentPart::Text {
223                    text: system_prompt.clone(),
224                }],
225            }];
226            if let Some(msg) = &proactive_lsp_message {
227                messages.push(msg.clone());
228            }
229            messages.extend(derived.messages.clone());
230
231            let request = CompletionRequest {
232                messages,
233                tools: advertised_tool_definitions.clone(),
234                model: model.clone(),
235                temperature,
236                top_p: None,
237                max_tokens: Some(session_completion_max_tokens()),
238                stop: Vec::new(),
239            };
240
241            match provider.complete(request).await {
242                Ok(r) => break r,
243                Err(e) => {
244                    if attempt == 1 && is_prompt_too_long_error(&e) {
245                        tracing::warn!(error = %e, "Provider rejected prompt as too long; re-deriving with force_keep_last=6 and retrying");
246                        derived = derive_context(
247                            session,
248                            Arc::clone(&provider),
249                            &model,
250                            &system_prompt,
251                            &advertised_tool_definitions,
252                            None,
253                            Some(6),
254                        )
255                        .await?;
256                        continue;
257                    }
258                    if upstream_retry_count < MAX_UPSTREAM_RETRIES
259                        && is_retryable_upstream_error(&e)
260                    {
261                        upstream_retry_count += 1;
262                        let backoff_secs = 1u64 << (upstream_retry_count - 1).min(2);
263                        tracing::warn!(
264                            error = %e,
265                            retry = upstream_retry_count,
266                            max = MAX_UPSTREAM_RETRIES,
267                            backoff_secs,
268                            "Retryable upstream provider error; sleeping and retrying"
269                        );
270                        tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
271                        if let Some((retry_provider, retry_model)) =
272                            choose_router_target(&registry, selected_provider, &model)
273                        {
274                            tracing::info!(
275                                to_provider = %retry_provider,
276                                to_model = %retry_model,
277                                "Failing over to alternate provider/model"
278                            );
279                            session.metadata.model =
280                                Some(format!("{retry_provider}/{retry_model}"));
281                            attempt = 0;
282                        }
283                        continue;
284                    }
285                    return Err(e);
286                }
287            }
288        };
289
290        let response = if let Some(ref router) = tool_router {
291            router
292                .maybe_reformat(response, &tool_definitions, model_supports_tools)
293                .await
294        } else {
295            response
296        };
297        let response = normalize_textual_tool_calls(response, &tool_definitions);
298
299        crate::telemetry::TOKEN_USAGE.record_model_usage_with_cache(
300            &model,
301            response.usage.prompt_tokens as u64,
302            response.usage.completion_tokens as u64,
303            response.usage.cache_read_tokens.unwrap_or(0) as u64,
304            response.usage.cache_write_tokens.unwrap_or(0) as u64,
305        );
306
307        let mut truncated_tool_ids: Vec<(String, String)> = Vec::new();
308        let tool_calls: Vec<(String, String, serde_json::Value)> = response
309            .message
310            .content
311            .iter()
312            .filter_map(|part| {
313                if let ContentPart::ToolCall {
314                    id,
315                    name,
316                    arguments,
317                    ..
318                } = part
319                {
320                    match serde_json::from_str::<serde_json::Value>(arguments) {
321                        Ok(args) => Some((id.clone(), name.clone(), args)),
322                        Err(e) => {
323                            tracing::warn!(
324                                tool = %name,
325                                tool_call_id = %id,
326                                args_len = arguments.len(),
327                                error = %e,
328                                "Tool call arguments failed to parse (likely truncated by max_tokens)"
329                            );
330                            truncated_tool_ids.push((id.clone(), name.clone()));
331                            None
332                        }
333                    }
334                } else {
335                    None
336                }
337            })
338            .collect();
339
340        let assistant_text = extract_text_content(&&response.message.content);
341        if should_force_build_tool_first_retry(
342            &session.agent,
343            build_mode_tool_retry_count,
344            &tool_definitions,
345            &session.messages,
346            &cwd,
347            &assistant_text,
348            !tool_calls.is_empty(),
349            BUILD_MODE_TOOL_FIRST_MAX_RETRIES,
350        ) {
351            build_mode_tool_retry_count += 1;
352            tracing::warn!(
353                step = step,
354                agent = %session.agent,
355                retry = build_mode_tool_retry_count,
356                "Build mode tool-first guard triggered; retrying with execution nudge"
357            );
358            session.add_message(Message {
359                role: Role::User,
360                content: vec![ContentPart::Text {
361                    text: BUILD_MODE_TOOL_FIRST_NUDGE.to_string(),
362                }],
363            });
364            continue;
365        }
366        if should_retry_missing_native_tool_call(
367            selected_provider,
368            &model,
369            native_tool_promise_retry_count,
370            &tool_definitions,
371            &assistant_text,
372            !tool_calls.is_empty(),
373            NATIVE_TOOL_PROMISE_RETRY_MAX_RETRIES,
374        ) {
375            native_tool_promise_retry_count += 1;
376            tracing::warn!(
377                step = step,
378                provider = selected_provider,
379                model = %model,
380                retry = native_tool_promise_retry_count,
381                "Model described a tool step without emitting a tool call; retrying with corrective nudge"
382            );
383            session.add_message(response.message.clone());
384            session.add_message(Message {
385                role: Role::User,
386                content: vec![ContentPart::Text {
387                    text: NATIVE_TOOL_PROMISE_NUDGE.to_string(),
388                }],
389            });
390            continue;
391        }
392        if !tool_calls.is_empty() {
393            build_mode_tool_retry_count = 0;
394            native_tool_promise_retry_count = 0;
395        } else if is_build_agent(&session.agent)
396            && build_request_requires_tool(&session.messages, &cwd)
397            && build_mode_tool_retry_count >= BUILD_MODE_TOOL_FIRST_MAX_RETRIES
398        {
399            return Err(anyhow::anyhow!(
400                "Build mode could not obtain tool calls for an explicit file-change request after {} retries. \
401                 Switch to a tool-capable model and try again.",
402                BUILD_MODE_TOOL_FIRST_MAX_RETRIES
403            ));
404        }
405
406        let mut step_text = String::new();
407
408        for part in &response.message.content {
409            match part {
410                ContentPart::Text { text } if !text.is_empty() => {
411                    step_text.push_str(text);
412                    step_text.push('\n');
413                }
414                ContentPart::Thinking { text } if !text.is_empty() => {
415                    if let Some(ref bus) = session.bus {
416                        let handle = bus.handle(&session.agent);
417                        handle.send_with_correlation(
418                            format!("agent.{}.thinking", session.agent),
419                            crate::bus::BusMessage::AgentThinking {
420                                agent_id: session.agent.clone(),
421                                thinking: text.clone(),
422                                step,
423                            },
424                            Some(turn_id.clone()),
425                        );
426                    }
427                }
428                _ => {}
429            }
430        }
431
432        if !step_text.trim().is_empty() {
433            final_output.push_str(&step_text);
434        }
435
436        if tool_calls.is_empty() && truncated_tool_ids.is_empty() {
437            session.add_message(response.message.clone());
438            if is_build_agent(&session.agent) {
439                if let Some(report) =
440                    build_validation_report(&cwd, &touched_files, &baseline_git_dirty_files).await?
441                {
442                    validation_retry_count += 1;
443                    tracing::warn!(
444                        retries = validation_retry_count,
445                        issues = report.issue_count,
446                        "Post-edit validation found unresolved diagnostics"
447                    );
448                    if validation_retry_count >= POST_EDIT_VALIDATION_MAX_RETRIES {
449                        return Err(anyhow::anyhow!(
450                            "Post-edit validation failed after {} attempts.\n\n{}",
451                            POST_EDIT_VALIDATION_MAX_RETRIES,
452                            report.prompt
453                        ));
454                    }
455                    session.add_message(Message {
456                        role: Role::User,
457                        content: vec![ContentPart::Text {
458                            text: report.prompt,
459                        }],
460                    });
461                    final_output.clear();
462                    continue;
463                }
464            }
465            break;
466        }
467
468        if !truncated_tool_ids.is_empty() {
469            if tool_calls.is_empty() {
470                session.add_message(response.message.clone());
471            }
472            for (tool_id, tool_name) in &truncated_tool_ids {
473                let error_content = format!(
474                    "Error: Your tool call to `{tool_name}` was truncated — the arguments \
475                     JSON was cut off mid-string (likely hit the max_tokens limit). \
476                     Please retry with a shorter approach: use the `write` tool to write \
477                     content in smaller pieces, or reduce the size of your arguments."
478                );
479                session.add_message(Message {
480                    role: Role::Tool,
481                    content: vec![ContentPart::ToolResult {
482                        tool_call_id: tool_id.clone(),
483                        content: error_content,
484                    }],
485                });
486            }
487            if tool_calls.is_empty() {
488                continue;
489            }
490        }
491
492        {
493            let mut sigs: Vec<String> = tool_calls
494                .iter()
495                .map(|(_, name, args)| format!("{name}:{args}"))
496                .collect();
497            sigs.sort();
498            let sig = sigs.join("|");
499
500            if last_tool_sig.as_deref() == Some(&sig) {
501                consecutive_same_tool += 1;
502            } else {
503                consecutive_same_tool = 1;
504                last_tool_sig = Some(sig);
505            }
506
507            let force_answer = consecutive_same_tool > MAX_CONSECUTIVE_SAME_TOOL
508                || (!model_supports_tools && step >= 3);
509
510            if force_answer {
511                tracing::warn!(
512                    step = step,
513                    consecutive = consecutive_same_tool,
514                    "Breaking agent loop: forcing final answer",
515                );
516                let mut nudge_msg = response.message.clone();
517                nudge_msg
518                    .content
519                    .retain(|p| !matches!(p, ContentPart::ToolCall { .. }));
520                if !nudge_msg.content.is_empty() {
521                    session.add_message(nudge_msg);
522                }
523                session.add_message(Message {
524                    role: Role::User,
525                    content: vec![ContentPart::Text {
526                        text: FORCE_FINAL_ANSWER_NUDGE.to_string(),
527                    }],
528                });
529                continue;
530            }
531        }
532
533        session.add_message(response.message.clone());
534
535        tracing::info!(
536            step = step,
537            num_tools = tool_calls.len(),
538            "Executing tool calls"
539        );
540
541        let mut codesearch_thrash_guard_triggered = false;
542        for (tool_id, tool_name, tool_input) in tool_calls {
543            let (tool_name, tool_input) =
544                normalize_tool_call_for_execution(&tool_name, &tool_input);
545            tracing::info!(tool = %tool_name, tool_id = %tool_id, "Executing tool");
546
547            if tool_name == "list_tools" {
548                let content = list_tools_bootstrap_output(&tool_definitions, &tool_input);
549                session.add_message(Message {
550                    role: Role::Tool,
551                    content: vec![ContentPart::ToolResult {
552                        tool_call_id: tool_id,
553                        content,
554                    }],
555                });
556                continue;
557            }
558
559            if let Some(ref bus) = session.bus {
560                let handle = bus.handle(&session.agent);
561                handle.send_with_correlation(
562                    format!("agent.{}.tool.request", session.agent),
563                    crate::bus::BusMessage::ToolRequest {
564                        request_id: tool_id.clone(),
565                        agent_id: session.agent.clone(),
566                        tool_name: tool_name.clone(),
567                        arguments: tool_input.clone(),
568                        step,
569                    },
570                    Some(turn_id.clone()),
571                );
572            }
573
574            if is_interactive_tool(&tool_name) {
575                tracing::warn!(tool = %tool_name, "Blocking interactive tool in session loop");
576                session.add_message(Message {
577                    role: Role::Tool,
578                    content: vec![ContentPart::ToolResult {
579                        tool_call_id: tool_id,
580                        content: "Error: Interactive tool 'question' is disabled in this interface. Ask the user directly in assistant text.".to_string(),
581                    }],
582                });
583                continue;
584            }
585
586            if let Some(reason) = detect_stub_in_tool_input(&tool_name, &tool_input) {
587                tracing::warn!(tool = %tool_name, reason = %reason, "Blocking suspected stubbed edit");
588                session.add_message(Message {
589                    role: Role::Tool,
590                    content: vec![ContentPart::ToolResult {
591                        tool_call_id: tool_id,
592                        content: format!(
593                            "Error: Refactor guard rejected this edit: {reason}. \
594                             Provide concrete, behavior-preserving implementation (no placeholders/stubs)."
595                        ),
596                    }],
597                });
598                continue;
599            }
600
601            let exec_start = std::time::Instant::now();
602            let exec_input = enrich_tool_input_with_runtime_context(
603                &tool_input,
604                &cwd,
605                session.metadata.model.as_deref(),
606                &session.id,
607                &session.agent,
608                session.metadata.provenance.as_ref(),
609            );
610            let (content, success, tool_metadata) = execute_tool(
611                &tool_registry,
612                &tool_name,
613                &exec_input,
614                &session.id,
615                exec_start,
616            )
617            .await;
618
619            let requires_confirmation = tool_result_requires_confirmation(tool_metadata.as_ref());
620            let (content, success, tool_metadata, requires_confirmation) = if requires_confirmation
621                && session.metadata.auto_apply_edits
622            {
623                let preview_content = content.clone();
624                match auto_apply_pending_confirmation(
625                    &tool_name,
626                    &exec_input,
627                    tool_metadata.as_ref(),
628                )
629                .await
630                {
631                    Ok(Some((content, success, tool_metadata))) => {
632                        tracing::info!(
633                            tool = %tool_name,
634                            "Auto-applied pending confirmation in TUI session"
635                        );
636                        (content, success, tool_metadata, false)
637                    }
638                    Ok(None) => (content, success, tool_metadata, true),
639                    Err(error) => (
640                        format!(
641                            "{}\n\nTUI edit auto-apply failed: {}",
642                            pending_confirmation_tool_result_content(&tool_name, &preview_content,),
643                            error
644                        ),
645                        false,
646                        tool_metadata,
647                        true,
648                    ),
649                }
650            } else {
651                (content, success, tool_metadata, requires_confirmation)
652            };
653            let rendered_content = if requires_confirmation {
654                pending_confirmation_tool_result_content(&tool_name, &content)
655            } else {
656                content.clone()
657            };
658
659            if !requires_confirmation {
660                track_touched_files(
661                    &mut touched_files,
662                    &cwd,
663                    &tool_name,
664                    &tool_input,
665                    tool_metadata.as_ref(),
666                );
667            }
668
669            let duration_ms = exec_start.elapsed().as_millis() as u64;
670            let codesearch_no_match =
671                is_codesearch_no_match_output(&tool_name, success, &rendered_content);
672
673            if let Some(ref bus) = session.bus {
674                let handle = bus.handle(&session.agent);
675                handle.send_with_correlation(
676                    format!("agent.{}.tool.response", session.agent),
677                    crate::bus::BusMessage::ToolResponse {
678                        request_id: tool_id.clone(),
679                        agent_id: session.agent.clone(),
680                        tool_name: tool_name.clone(),
681                        result: rendered_content.clone(),
682                        success,
683                        step,
684                    },
685                    Some(turn_id.clone()),
686                );
687                handle.send_with_correlation(
688                    format!("agent.{}.tool.output", session.agent),
689                    crate::bus::BusMessage::ToolOutputFull {
690                        agent_id: session.agent.clone(),
691                        tool_name: tool_name.clone(),
692                        output: rendered_content.clone(),
693                        success,
694                        step,
695                    },
696                    Some(turn_id.clone()),
697                );
698            }
699
700            if let Some(base_dir) = super::archive::event_stream_path() {
701                write_tool_event_file(
702                    &base_dir,
703                    &session.id,
704                    &tool_name,
705                    success,
706                    duration_ms,
707                    &rendered_content,
708                    session.messages.len() as u64,
709                );
710            }
711
712            let content = maybe_route_through_rlm(
713                &rendered_content,
714                &tool_name,
715                &tool_input,
716                &tool_id,
717                &session.id,
718                &session.messages,
719                &model,
720                Arc::clone(&provider),
721                &session.metadata.rlm,
722            )
723            .await;
724
725            session.add_message(Message {
726                role: Role::Tool,
727                content: vec![ContentPart::ToolResult {
728                    tool_call_id: tool_id,
729                    content,
730                }],
731            });
732
733            if is_build_agent(&session.agent) {
734                if codesearch_no_match {
735                    consecutive_codesearch_no_matches += 1;
736                } else {
737                    consecutive_codesearch_no_matches = 0;
738                }
739
740                if consecutive_codesearch_no_matches >= MAX_CONSECUTIVE_CODESEARCH_NO_MATCHES {
741                    tracing::warn!(
742                        step = step,
743                        consecutive_codesearch_no_matches = consecutive_codesearch_no_matches,
744                        "Detected codesearch no-match thrash; nudging model to stop variant retries",
745                    );
746                    session.add_message(Message {
747                        role: Role::User,
748                        content: vec![ContentPart::Text {
749                            text: CODESEARCH_THRASH_NUDGE.to_string(),
750                        }],
751                    });
752                    codesearch_thrash_guard_triggered = true;
753                    break;
754                }
755            }
756        }
757
758        if codesearch_thrash_guard_triggered {
759            continue;
760        }
761    }
762
763    session.save().await?;
764
765    super::archive::archive_event_stream_to_s3(&session.id, super::archive::event_stream_path())
766        .await;
767
768    Ok(SessionResult {
769        text: final_output.trim().to_string(),
770        session_id: session.id.clone(),
771    })
772}
773
774/// Split the session's configured model string into `(provider, model_id)`.
775fn parse_session_model_selector(session: &Session, providers: &[&str]) -> (Option<String>, String) {
776    let Some(ref model_str) = session.metadata.model else {
777        return (None, String::new());
778    };
779    let (prov, model) = parse_model_string(model_str);
780    let prov = prov.map(|p| match p {
781        "zhipuai" | "z-ai" => "zai",
782        other => other,
783    });
784    if prov.is_some() {
785        (prov.map(|s| s.to_string()), model.to_string())
786    } else if providers.contains(&model) {
787        (Some(model.to_string()), String::new())
788    } else {
789        (None, model.to_string())
790    }
791}
792
793/// Execute a tool call and emit the corresponding audit entry.
794async fn execute_tool(
795    tool_registry: &ToolRegistry,
796    tool_name: &str,
797    exec_input: &serde_json::Value,
798    session_id: &str,
799    exec_start: std::time::Instant,
800) -> (
801    String,
802    bool,
803    Option<std::collections::HashMap<String, serde_json::Value>>,
804) {
805    if let Some(tool) = tool_registry.get(tool_name) {
806        match tool.execute(exec_input.clone()).await {
807            Ok(result) => {
808                let duration_ms = exec_start.elapsed().as_millis() as u64;
809                tracing::info!(tool = %tool_name, success = result.success, "Tool execution completed");
810                if let Some(audit) = try_audit_log() {
811                    audit
812                        .log_with_correlation(
813                            AuditCategory::ToolExecution,
814                            format!("tool:{}", tool_name),
815                            if result.success {
816                                AuditOutcome::Success
817                            } else {
818                                AuditOutcome::Failure
819                            },
820                            None,
821                            Some(json!({
822                                "duration_ms": duration_ms,
823                                "output_len": result.output.len()
824                            })),
825                            None,
826                            None,
827                            None,
828                            Some(session_id.to_string()),
829                        )
830                        .await;
831                }
832                (result.output, result.success, Some(result.metadata))
833            }
834            Err(e) => {
835                let duration_ms = exec_start.elapsed().as_millis() as u64;
836                tracing::warn!(tool = %tool_name, error = %e, "Tool execution failed");
837                if let Some(audit) = try_audit_log() {
838                    audit
839                        .log_with_correlation(
840                            AuditCategory::ToolExecution,
841                            format!("tool:{}", tool_name),
842                            AuditOutcome::Failure,
843                            None,
844                            Some(json!({ "duration_ms": duration_ms, "error": e.to_string() })),
845                            None,
846                            None,
847                            None,
848                            Some(session_id.to_string()),
849                        )
850                        .await;
851                }
852                (format!("Error: {}", e), false, None)
853            }
854        }
855    } else {
856        tracing::warn!(tool = %tool_name, "Tool not found");
857        if let Some(audit) = try_audit_log() {
858            audit
859                .log_with_correlation(
860                    AuditCategory::ToolExecution,
861                    format!("tool:{}", tool_name),
862                    AuditOutcome::Failure,
863                    None,
864                    Some(json!({ "error": "unknown_tool" })),
865                    None,
866                    None,
867                    None,
868                    Some(session_id.to_string()),
869                )
870                .await;
871        }
872        (format!("Error: Unknown tool '{}'", tool_name), false, None)
873    }
874}
875
876/// Write a [`ChatEvent::tool_result`] JSONL record to disk (fire-and-forget).
877fn write_tool_event_file(
878    base_dir: &std::path::Path,
879    session_id: &str,
880    tool_name: &str,
881    success: bool,
882    duration_ms: u64,
883    rendered_content: &str,
884    seq: u64,
885) {
886    let workspace = std::env::var("PWD")
887        .map(std::path::PathBuf::from)
888        .unwrap_or_else(|_| std::env::current_dir().unwrap_or_default());
889    let event = ChatEvent::tool_result(
890        workspace,
891        session_id.to_string(),
892        tool_name,
893        success,
894        duration_ms,
895        rendered_content,
896        seq,
897    );
898    let event_json = event.to_json();
899    let timestamp = Utc::now().format("%Y%m%dT%H%M%SZ");
900    let filename = format!(
901        "{}-chat-events-{:020}-{:020}.jsonl",
902        timestamp,
903        seq * 10000,
904        (seq + 1) * 10000
905    );
906    let event_path = base_dir.join(session_id).join(filename);
907
908    tokio::spawn(async move {
909        if let Some(parent) = event_path.parent() {
910            let _ = tokio::fs::create_dir_all(parent).await;
911        }
912        if let Ok(mut file) = tokio::fs::OpenOptions::new()
913            .create(true)
914            .append(true)
915            .open(&event_path)
916            .await
917        {
918            use tokio::io::AsyncWriteExt;
919            let _ = file.write_all(event_json.as_bytes()).await;
920            let _ = file.write_all(b"\n").await;
921        }
922    });
923}
924
925/// Route a large tool output through the Recursive Language Model when it
926/// exceeds the routing heuristics.
927async fn maybe_route_through_rlm(
928    rendered_content: &str,
929    tool_name: &str,
930    tool_input: &serde_json::Value,
931    tool_id: &str,
932    session_id: &str,
933    messages: &[Message],
934    model: &str,
935    provider: Arc<dyn crate::provider::Provider>,
936    rlm_config: &RlmConfig,
937) -> String {
938    let ctx_window = context_window_for_model(model);
939    let current_tokens = estimate_tokens_for_messages(messages);
940    let routing_ctx = RoutingContext {
941        tool_id: tool_name.to_string(),
942        session_id: session_id.to_string(),
943        call_id: Some(tool_id.to_string()),
944        model_context_limit: ctx_window,
945        current_context_tokens: Some(current_tokens),
946    };
947    let routing = RlmRouter::should_route(rendered_content, &routing_ctx, rlm_config);
948    if !routing.should_route {
949        return rendered_content.to_string();
950    }
951
952    tracing::info!(
953        tool = %tool_name,
954        reason = %routing.reason,
955        estimated_tokens = routing.estimated_tokens,
956        "RLM: Routing large tool output"
957    );
958    let auto_ctx = AutoProcessContext {
959        tool_id: tool_name,
960        tool_args: tool_input.clone(),
961        session_id,
962        abort: None,
963        on_progress: None,
964        provider: Arc::clone(&provider),
965        model: model.to_string(),
966        bus: None,
967        trace_id: None,
968        subcall_provider: None,
969        subcall_model: None,
970    };
971    let original_bytes = rendered_content.len();
972    match RlmRouter::auto_process(rendered_content, auto_ctx, &rlm_config).await {
973        Ok(result) => {
974            tracing::info!(
975                input_tokens = result.stats.input_tokens,
976                output_tokens = result.stats.output_tokens,
977                iterations = result.stats.iterations,
978                "RLM: Processing complete"
979            );
980            format!(
981                "[RLM-SUMMARY tool={tool_name} original_bytes={original_bytes} reason={reason}]\n{body}\n[END RLM-SUMMARY — the raw tool output was replaced by this model-generated summary; re-running the same call will produce a similar summary, not the original bytes]",
982                reason = routing.reason,
983                body = result.processed,
984            )
985        }
986        Err(e) => {
987            tracing::warn!(error = %e, "RLM: auto_process failed, using smart_truncate");
988            let (truncated, _, _) =
989                RlmRouter::smart_truncate(rendered_content, tool_name, tool_input, ctx_window / 4);
990            truncated
991        }
992    }
993}
codetether_agent/session/helper/prompt.rs

codetether_agent/session/helper/
prompt.rs