use crate::agents::agent::AgentGPT;
#[cfg(feature = "net")]
use crate::collaboration::Collaborator;
use crate::common::utils::{
Capability, ClientType, Communication, ContextManager, Knowledge, Persona, Planner, Reflection,
Status, Task, TaskScheduler, Tool, similarity,
};
#[allow(unused)]
use crate::prompts::designer::{IMGGET_PROMPT, WEB_DESIGNER_PROMPT};
use crate::traits::agent::Agent;
use crate::traits::functions::{AsyncFunctions, Executor, Functions};
use anyhow::{Result, anyhow};
use async_trait::async_trait;
use auto_derive::Auto;
use colored::*;
#[cfg(feature = "img")]
use getimg::client::Client as ImgClient;
#[cfg(feature = "img")]
use getimg::utils::save_image;
use std::borrow::Cow;
use std::env::var;
use tokio::fs;
use tracing::{debug, error, info};
#[cfg(feature = "mem")]
use {
crate::common::memory::load_long_term_memory, crate::common::memory::long_term_memory_context,
crate::common::memory::save_long_term_memory,
};
#[cfg(feature = "oai")]
use {openai_dive::v1::models::FlagshipModel, openai_dive::v1::resources::chat::*};
#[cfg(feature = "gem")]
use gems::{
chat::ChatBuilder,
imagen::ImageGenBuilder,
messages::{Content, Message},
models::Model,
stream::StreamBuilder,
traits::CTrait,
utils::load_and_encode_image,
vision::VisionBuilder,
};
#[cfg(any(feature = "oai", feature = "gem", feature = "cld", feature = "xai"))]
use crate::traits::functions::ReqResponse;
#[cfg(feature = "xai")]
use x_ai::{
chat_compl::{ChatCompletionsRequestBuilder, Message as XaiMessage},
traits::ChatCompletionsFetcher,
};
#[cfg(feature = "cld")]
use anthropic_ai_sdk::types::message::{
ContentBlock, CreateMessageParams, Message as AnthMessage, MessageClient,
RequiredMessageParams, Role,
};
use derivative::Derivative;
#[derive(Derivative, Auto)]
#[derivative(Debug, Clone)]
pub struct DesignerGPT {
workspace: Cow<'static, str>,
agent: AgentGPT,
#[cfg(feature = "img")]
img_client: ImgClient,
client: ClientType,
}
impl Default for DesignerGPT {
fn default() -> Self {
Self {
workspace: Cow::Borrowed("default_workspace"),
agent: AgentGPT::default(),
#[cfg(feature = "img")]
img_client: ImgClient::new("default", "default"),
client: ClientType::default(),
}
}
}
impl DesignerGPT {
#[allow(unreachable_code)]
pub async fn new(objective: &'static str, position: &'static str) -> Self {
let workspace = var("AUTOGPT_WORKSPACE")
.unwrap_or("workspace/".to_string())
.to_owned()
+ "designer";
if !fs::try_exists(&workspace).await.unwrap_or(false) {
match fs::create_dir_all(&workspace).await {
Ok(_) => debug!("Directory '{}' created successfully!", workspace),
Err(e) => error!("Error creating directory '{}': {}", workspace, e),
}
} else {
debug!("Workspace directory '{}' already exists.", workspace);
}
let mut agent: AgentGPT = AgentGPT::new_borrowed(objective, position);
agent.id = agent.position().to_string().into();
#[cfg(feature = "img")]
let getimg_api_key = var("GETIMG_API_KEY").unwrap_or_default().to_owned();
#[cfg(feature = "img")]
let getimg_model = var("GETIMG__MODEL")
.unwrap_or("lcm-realistic-vision-v5-1".to_string())
.to_owned();
#[cfg(feature = "img")]
let img_client = ImgClient::new(&getimg_api_key, &getimg_model);
let client = ClientType::from_env();
info!(
"{}",
format!("[*] {:?}: 🛠️ Getting ready!", agent.position(),)
.bright_white()
.bold()
);
Self {
workspace: workspace.into(),
agent,
#[cfg(feature = "img")]
img_client,
client,
}
}
pub async fn generate_image_from_text(&mut self, tasks: &Task) -> Result<()> {
let img_path = self.workspace.to_string() + "/img.jpg";
let text_prompt: String = format!("{IMGGET_PROMPT}\n\nUser Prompt: {}", tasks.description);
#[allow(unused)]
let negative_prompt = Some("Disfigured, cartoon, blurry");
self.agent.add_communication(Communication {
role: Cow::Borrowed("user"),
content: tasks.description.clone(),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("user"),
content: tasks.description.clone(),
})
.await;
}
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!("Generating image with prompt: '{text_prompt}'")),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!("Generating image with prompt: '{text_prompt}'")),
})
.await;
}
#[allow(unused)]
#[cfg(feature = "img")]
let mut text_response = String::new();
#[cfg(feature = "img")]
save_image(&text_response, &img_path).unwrap();
#[allow(unused)]
let mut image_data = vec![0];
#[cfg(any(feature = "oai", feature = "gem", feature = "cld", feature = "xai"))]
{
image_data = self.imagen(&text_prompt).await?;
}
std::fs::write(&img_path, &image_data)?;
self.agent.add_communication(Communication {
role: Cow::Borrowed("system"),
content: Cow::Owned(format!("Image saved at {img_path}")),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("system"),
content: Cow::Owned(format!("Image saved at {img_path}")),
})
.await;
}
info!(
"[*] {:?}: Image saved at {}",
self.agent.position(),
img_path
);
Ok(())
}
pub async fn generate_text_from_image(&mut self, image_path: &str) -> Result<String> {
self.agent.add_communication(Communication {
role: Cow::Borrowed("user"),
content: Cow::Owned(format!(
"Requesting text generation from image at path: {image_path}"
)),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("user"),
content: Cow::Owned(format!(
"Requesting text generation from image at path: {image_path}"
)),
})
.await;
}
#[allow(unused)]
let mut base64_image_data = String::new();
#[cfg(feature = "gem")]
{
base64_image_data = match load_and_encode_image(image_path) {
Ok(data) => data,
Err(_) => {
let error_msg = format!("Failed to load or encode image at path: {image_path}");
self.agent.add_communication(Communication {
role: Cow::Borrowed("system"),
content: Cow::Owned(error_msg.clone()),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("system"),
content: Cow::Owned(error_msg.clone()),
})
.await;
}
debug!("[*] {:?}: Error loading image!", self.agent.position());
return Ok("".to_string());
}
};
}
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned("Generating description from uploaded image...".to_string()),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(
"Generating description from uploaded image...".to_string(),
),
})
.await;
}
let response: String = match &mut self.client {
#[cfg(feature = "gem")]
ClientType::Gemini(gem_client) => {
let params = VisionBuilder::default()
.input(Message::User {
content: Content::Text(WEB_DESIGNER_PROMPT.to_string()),
name: None,
})
.image(Message::Tool {
content: base64_image_data,
})
.build()?;
let result = gem_client.vision().generate(params).await;
match result {
Ok(response) => {
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!("Generated image description: {response}")),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Generated image description: {response}"
)),
})
.await;
}
debug!(
"[*] {:?}: Got Image Description: {:?}",
self.agent.position(),
response
);
response
}
Err(err) => {
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Error generating image description: {err}"
)),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Error generating image description: {err}"
)),
})
.await;
}
Default::default()
}
}
}
#[cfg(feature = "oai")]
ClientType::OpenAI(oai_client) => {
let parameters = ChatCompletionParametersBuilder::default()
.model(FlagshipModel::Gpt4O.to_string())
.messages(vec![
ChatMessage::User {
content: ChatMessageContent::Text("What is in this image?".to_string()),
name: None,
},
ChatMessage::User {
content: ChatMessageContent::ContentPart(vec![
ChatMessageContentPart::Image(ChatMessageImageContentPart {
r#type: "image_url".to_string(),
image_url: ImageUrlType {
url: base64_image_data.to_string(),
detail: None,
},
}),
]),
name: None,
},
])
.build()?;
let result = oai_client.chat().create(parameters).await;
match result {
Ok(chat_response) => {
let message = &chat_response.choices[0].message;
let response_text = match message {
ChatMessage::Assistant {
content: Some(chat_content),
..
} => chat_content.to_string(),
ChatMessage::User { content, .. } => content.to_string(),
ChatMessage::System { content, .. } => content.to_string(),
ChatMessage::Developer { content, .. } => content.to_string(),
ChatMessage::Tool { content, .. } => content.clone(),
_ => String::from(""),
};
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Generated image description: {response_text}"
)),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Generated image description: {response_text}"
)),
})
.await;
}
debug!(
"[*] {:?}: Got Image Description: {:?}",
self.agent.position(),
response_text
);
response_text
}
Err(err) => {
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Error generating image description: {err}"
)),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(format!(
"Error generating image description: {err}"
)),
})
.await;
}
Default::default()
}
}
}
#[cfg(feature = "xai")]
ClientType::Xai(xai_client) => {
let messages = vec![XaiMessage {
role: "user".into(),
content: "What is in this image?".to_string(),
}];
let rb = ChatCompletionsRequestBuilder::new(
xai_client.clone(),
"grok-beta".into(),
messages,
)
.temperature(0.0)
.stream(false);
let req = rb.clone().build()?;
let resp = rb.create_chat_completion(req).await;
match resp {
Ok(chat) => {
let response_text = chat.choices[0].message.content.clone();
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(
"Generated image description: ".to_string() + &response_text,
),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(
"Generated image description: ".to_string()
+ &response_text,
),
})
.await;
}
#[cfg(debug_assertions)]
debug!(
"[*] {:?}: Got XAI Output: {:?}",
self.agent.position(),
response_text
);
response_text
}
Err(err) => {
let err_msg = format!("ERROR_MESSAGE: {err}");
self.agent.add_communication(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(err_msg.clone()),
});
#[cfg(feature = "mem")]
{
let _ = self
.save_ltm(Communication {
role: Cow::Borrowed("assistant"),
content: Cow::Owned(err_msg.clone()),
})
.await;
}
return Err(anyhow!(err_msg));
}
}
}
#[allow(unreachable_patterns)]
_ => {
return Err(anyhow!(
"No valid AI client configured. Enable `gem`, `oai`, `cld`, or `xai` feature."
));
}
};
Ok(response)
}
pub async fn compare_text_and_image_prompts(
&mut self,
tasks: &mut Task,
generated_text: &str,
) -> Result<bool> {
let getimg_prompt = &tasks.description;
let similarity_threshold = 0.8;
let similarity = similarity(getimg_prompt, generated_text);
if similarity >= similarity_threshold {
return Ok(true);
}
Ok(false)
}
}
#[async_trait]
impl Executor for DesignerGPT {
async fn execute<'a>(
&'a mut self,
tasks: &'a mut Task,
_execute: bool,
_browse: bool,
_max_tries: u64,
) -> Result<()> {
self.agent.update(Status::Idle);
info!(
"{}",
format!("[*] {:?}: Executing task:", self.agent.position(),)
.bright_white()
.bold()
);
for task in tasks.clone().description.clone().split("- ") {
if !task.trim().is_empty() {
info!("{} {}", "•".bright_white().bold(), task.trim().cyan());
}
}
let mut _count = 0;
while self.agent.status() != &Status::Completed {
match self.agent.status() {
Status::Idle => {
debug!("[*] {:?}: Idle", self.agent.position());
self.generate_image_from_text(tasks).await?;
_count += 1;
self.agent.update(Status::Completed);
}
_ => {
self.agent.update(Status::Completed);
}
}
}
Ok(())
}
}