Skip to main content

ras_agent/application/
run_step.rs

1use std::sync::Arc;
2use std::time::Instant;
3
4use chrono::Utc;
5use ras_cdp::BrowserPort;
6use ras_dom::DomExtractor;
7use ras_errors::AppError;
8use ras_events::EventBus;
9use ras_llm::{ChatMessage, ChatResponse, InvokeOptions, LlmClient};
10use ras_tools::domain::registry::{ActionRegistry, ToolContext};
11use ras_types::{ActionResult, StepId};
12use url::Url;
13
14use crate::application::compute_action_hash::compute_action_hash;
15use crate::application::detect_loop::{build_budget_warning, build_loop_nudge};
16use crate::application::fallback_llm::should_switch_to_fallback;
17use crate::application::parse_output::parse_agent_output;
18use crate::domain::agent_history::StepRecord;
19use crate::domain::loop_detector::ActionLoopDetector;
20use crate::domain::step_metadata::StepMetadata;
21
22pub struct RunStep {
23    primary_llm: Arc<dyn LlmClient>,
24    fallback_llm: Option<Arc<dyn LlmClient>>,
25    registry: Arc<ActionRegistry>,
26    browser: Arc<dyn BrowserPort>,
27    events: Arc<dyn EventBus>,
28    dom_extractor: Option<Arc<dyn DomExtractor>>,
29}
30
31impl RunStep {
32    #[must_use]
33    pub fn new(
34        primary: Arc<dyn LlmClient>,
35        fallback: Option<Arc<dyn LlmClient>>,
36        registry: Arc<ActionRegistry>,
37        browser: Arc<dyn BrowserPort>,
38        events: Arc<dyn EventBus>,
39        dom_extractor: Option<Arc<dyn DomExtractor>>,
40    ) -> Self {
41        Self {
42            primary_llm: primary,
43            fallback_llm: fallback,
44            registry,
45            browser,
46            events,
47            dom_extractor,
48        }
49    }
50
51    pub async fn execute(
52        &self,
53        step: StepId,
54        max_steps: u32,
55        prompt: Vec<ChatMessage>,
56        detector: &mut ActionLoopDetector,
57    ) -> Result<StepRecord, AppError> {
58        let started = Instant::now();
59        let mut messages = prompt;
60        if let Some(nudge) = build_loop_nudge(detector) {
61            messages.push(nudge);
62        }
63        if let Some(warn) = build_budget_warning(step.0, max_steps) {
64            messages.push(warn);
65        }
66        let response = self.invoke_with_fallback(messages).await?;
67        let output = parse_agent_output(&response)?;
68
69        let target = self.browser.focused_target().await.ok();
70        let page_url = match &target {
71            Some(t) => self
72                .browser
73                .evaluate(t, "location.href")
74                .await
75                .ok()
76                .and_then(|v| v.as_str().and_then(|s| Url::parse(s).ok())),
77            None => None,
78        };
79
80        let pre_clickables: Arc<Vec<ras_dom::ClickableElement>> =
81            match (&self.dom_extractor, &target) {
82                (Some(extractor), Some(t)) => extractor
83                    .snapshot(t)
84                    .await
85                    .ok()
86                    .map(|s| Arc::new(s.clickables))
87                    .unwrap_or_else(|| Arc::new(Vec::new())),
88                _ => Arc::new(Vec::new()),
89            };
90
91        let mut results = Vec::new();
92        for action in &output.action {
93            detector.record_action(compute_action_hash(action));
94            let Some(reg) = self.registry.get(&action.name) else {
95                results.push(ActionResult::err(format!(
96                    "unknown action: {}",
97                    action.name.0
98                )));
99                break;
100            };
101            let ctx = ToolContext {
102                browser: self.browser.clone(),
103                events: self.events.clone(),
104                page_url: page_url.clone(),
105                available_files: Vec::new(),
106                clickables: pre_clickables.clone(),
107            };
108            match reg.handler.execute(action.parameters.clone(), ctx).await {
109                Ok(r) => {
110                    let terminates = reg.metadata.terminates_sequence;
111                    let is_done = r.is_done;
112                    let is_err = r.is_error();
113                    results.push(r);
114                    if terminates || is_done || is_err {
115                        break;
116                    }
117                }
118                Err(e) => {
119                    results.push(ActionResult::err(e.to_string()));
120                    break;
121                }
122            }
123        }
124
125        let summary = match (&self.dom_extractor, &target) {
126            (Some(extractor), Some(t)) => match extractor.snapshot(t).await {
127                Ok(s) => Some(s),
128                Err(e) => {
129                    tracing::warn!(error = %e, "dom snapshot failed; continuing without grounding");
130                    None
131                }
132            },
133            _ => None,
134        };
135
136        let metadata = StepMetadata {
137            duration_ms: started.elapsed().as_millis() as u64,
138            step_interval_ms: None,
139            usage: response.usage,
140            model: Some(response.model.clone()),
141            fallback_used: false,
142        };
143        Ok(StepRecord {
144            step,
145            started_at: Utc::now(),
146            url: page_url,
147            output,
148            results,
149            metadata,
150            summary,
151        })
152    }
153
154    async fn invoke_with_fallback(
155        &self,
156        messages: Vec<ChatMessage>,
157    ) -> Result<ChatResponse, AppError> {
158        let opts = InvokeOptions::default();
159        match self
160            .primary_llm
161            .ainvoke(messages.clone(), opts.clone())
162            .await
163        {
164            Ok(r) => Ok(r),
165            Err(e) if should_switch_to_fallback(&e) => match &self.fallback_llm {
166                Some(fb) => fb.ainvoke(messages, opts).await,
167                None => Err(e),
168            },
169            Err(e) => Err(e),
170        }
171    }
172}
173
174#[must_use]
175pub fn done_result(text: impl Into<String>) -> ActionResult {
176    ActionResult::done(text)
177}