1use anyhow::{anyhow, Result};
2use async_openai::{
3 types::{ChatCompletionRequestMessage, CreateChatCompletionRequestArgs, Role},
4 Client,
5};
6use indoc::formatdoc;
7use tracing::debug;
8use url::Url;
9
10use crate::Action;
11
12#[derive(Debug)]
14pub struct Conversation {
15 goal: String,
17 client: Client,
19 url: Option<Url>,
21 messages: Vec<ChatCompletionRequestMessage>,
23}
24
25impl Conversation {
26 #[must_use]
28 pub fn new(goal: String) -> Self {
29 Self {
30 goal,
31 url: None,
32 client: Client::new(),
33 messages: vec![ChatCompletionRequestMessage {
34 name: None,
35 role: Role::System,
36 content: formatdoc!("
37 You are an agent controlling a browser. You are given an objective that you are trying to achieve, the URL of the current website, and a simplified markup description of the page contents, which looks like this:
38 <p id=0>text</p>
39 <link id=1 href=\"link url\">text</link>
40 <button id=2>text</button>
41 <input id=3>placeholder</input>
42 <img id=4 alt=\"image description\"/>
43
44 You must respond with ONLY one of the following commands AND NOTHING ELSE:
45 - CLICK X - click on a given element. You can only click on links, buttons, and inputs!
46 - TYPE X \"TEXT\" - type the specified text into the input with id X and press ENTER
47 - ANSWER \"TEXT\" - Respond to the user with the specified text once you have completed the objective
48 "),
49 }]}
50 }
51
52 #[tracing::instrument]
54 pub async fn request_action(&mut self, url: &str, page_content: &str) -> Result<Action> {
55 self.enforce_context_length(url)?;
56
57 self.messages.push(ChatCompletionRequestMessage {
58 name: None,
59 role: Role::User,
60 content: format!(
61 "OBJECTIVE: {}\nCURRENT URL: {url}\nPAGE CONTENT: {page_content}",
62 self.goal
63 ),
64 });
65
66 let response = self
67 .client
68 .chat()
69 .create(
70 CreateChatCompletionRequestArgs::default()
71 .model("gpt-4")
72 .temperature(0.7f32)
73 .max_tokens(100u16)
74 .messages(self.messages.clone())
75 .build()?,
76 )
77 .await?;
78
79 debug!(
80 "Got a response, used {} tokens.",
81 response
82 .usage
83 .expect("Usage should be present.")
84 .total_tokens
85 );
86
87 let message = &response
88 .choices
89 .get(0)
90 .ok_or_else(|| anyhow!("No choices returned from OpenAI.",))?
91 .message;
92
93 self.messages.push(ChatCompletionRequestMessage {
94 name: None,
95 role: message.role.clone(),
96 content: message.content.clone(),
97 });
98
99 message.content.clone().try_into()
100 }
101
102 fn enforce_context_length(&mut self, url: &str) -> Result<()> {
103 let new_url = Url::parse(url)?;
104
105 if self.url.as_ref().map(Url::host) != Some(new_url.host()) {
106 debug!("Host changed, clearing context.");
107 self.messages = self.messages.drain(..1).collect();
108 }
109
110 self.url = Some(new_url);
111 Ok(())
112 }
113}