use crate::agent_loop::{AgentEvent, AgentLoopConfig, KvCacheState, run_agent_loop};
use crate::agents_md::AgentsMdRegistry;
use crate::conversation::Conversation;
use crate::error::AgentError;
use crate::inference::{InferenceConfig, InferenceEngine, InferenceScheduler};
use crate::permission::{PermissionMode, PermissionTracker};
use crate::skills::SkillRegistry;
use crate::tool::{Tool, ToolRegistry};
use crate::tools;
use llama_cpp_v3::LlamaContext;
use std::path::PathBuf;
use std::sync::Arc;
pub struct AgentBuilder {
shared_engine: Option<Arc<InferenceEngine>>,
backend_type: llama_cpp_v3::backend::Backend,
model_path: Option<String>,
n_gpu_layers: i32,
app_name: String,
cache_dir: Option<PathBuf>,
explicit_dll_path: Option<PathBuf>,
dll_version: Option<String>,
chat_template: Option<String>,
system_prompt: String,
n_ctx: u32,
loop_config: AgentLoopConfig,
permission_mode: PermissionMode,
custom_tools: Vec<Box<dyn Tool>>,
skip_builtin_tools: bool,
enable_skills: bool,
extra_skills_paths: Vec<PathBuf>,
activated_skills: Vec<String>,
enable_agents_md: bool,
scheduler: Option<Arc<InferenceScheduler>>,
}
impl AgentBuilder {
pub fn new() -> Self {
Self {
shared_engine: None,
backend_type: llama_cpp_v3::backend::Backend::Cpu,
model_path: None,
n_gpu_layers: 0,
n_ctx: 8192,
app_name: "llama-cpp-v3-agent-sdk".to_string(),
cache_dir: None,
explicit_dll_path: None,
dll_version: None,
chat_template: None,
system_prompt: DEFAULT_SYSTEM_PROMPT.to_string(),
loop_config: AgentLoopConfig::default(),
permission_mode: PermissionMode::AutoApprove,
custom_tools: Vec::new(),
skip_builtin_tools: false,
enable_skills: true,
extra_skills_paths: Vec::new(),
activated_skills: Vec::new(),
enable_agents_md: true,
scheduler: None,
}
}
pub fn engine(mut self, engine: Arc<InferenceEngine>) -> Self {
self.shared_engine = Some(engine);
self
}
pub fn backend(mut self, backend: llama_cpp_v3::backend::Backend) -> Self {
self.backend_type = backend;
self
}
pub fn model_path(mut self, path: &str) -> Self {
self.model_path = Some(path.to_string());
self
}
pub fn n_gpu_layers(mut self, n: i32) -> Self {
self.n_gpu_layers = n;
self
}
pub fn app_name(mut self, name: &str) -> Self {
self.app_name = name.to_string();
self
}
pub fn cache_dir(mut self, dir: PathBuf) -> Self {
self.cache_dir = Some(dir);
self
}
pub fn explicit_dll_path(mut self, path: PathBuf) -> Self {
self.explicit_dll_path = Some(path);
self
}
pub fn dll_version(mut self, version: &str) -> Self {
self.dll_version = Some(version.to_string());
self
}
pub fn chat_template(mut self, template: &str) -> Self {
self.chat_template = Some(template.to_string());
self
}
pub fn system_prompt(mut self, prompt: &str) -> Self {
self.system_prompt = prompt.to_string();
self
}
pub fn n_ctx(mut self, n: u32) -> Self {
self.n_ctx = n;
self
}
pub fn max_iterations(mut self, n: usize) -> Self {
self.loop_config.max_iterations = n;
self
}
pub fn max_tokens_per_completion(mut self, n: usize) -> Self {
self.loop_config.max_tokens_per_completion = n;
self
}
pub fn temperature(mut self, temp: f32) -> Self {
self.loop_config.temperature = temp;
self
}
pub fn top_k(mut self, k: i32) -> Self {
self.loop_config.top_k = k;
self
}
pub fn min_p(mut self, p: f32) -> Self {
self.loop_config.min_p = p;
self
}
pub fn repeat_penalty(mut self, p: f32) -> Self {
self.loop_config.repeat_penalty = p;
self
}
pub fn stop_sequence(mut self, stop: &str) -> Self {
self.loop_config.stop_sequences.push(stop.to_string());
self
}
pub fn auto_approve(mut self) -> Self {
self.permission_mode = PermissionMode::AutoApprove;
self
}
pub fn permission_callback(
mut self,
cb: impl Fn(&crate::permission::PermissionRequest) -> crate::permission::PermissionDecision
+ Send
+ Sync
+ 'static,
) -> Self {
self.permission_mode = PermissionMode::Callback(Box::new(cb));
self
}
pub fn tool(mut self, tool: Box<dyn Tool>) -> Self {
self.custom_tools.push(tool);
self
}
pub fn skip_builtin_tools(mut self) -> Self {
self.skip_builtin_tools = true;
self
}
pub fn no_skills(mut self) -> Self {
self.enable_skills = false;
self
}
pub fn skills_path(mut self, path: PathBuf) -> Self {
self.extra_skills_paths.push(path);
self
}
pub fn activate_skill(mut self, name: &str) -> Self {
self.activated_skills.push(name.to_string());
self
}
pub fn no_agents_md(mut self) -> Self {
self.enable_agents_md = false;
self
}
pub fn scheduler(mut self, scheduler: Arc<InferenceScheduler>) -> Self {
self.scheduler = Some(scheduler);
self
}
pub fn build(self) -> Result<Agent, AgentError> {
let engine = if let Some(engine) = self.shared_engine {
engine
} else {
let model_path = self
.model_path
.ok_or_else(|| AgentError::Other(
"No model path specified. Use .model_path() or .engine().".to_string(),
))?;
let config = InferenceConfig {
backend: self.backend_type,
model_path,
n_gpu_layers: self.n_gpu_layers,
n_ctx: self.n_ctx,
app_name: self.app_name,
explicit_dll_path: self.explicit_dll_path,
dll_version: self.dll_version,
cache_dir: self.cache_dir,
chat_template: self.chat_template,
};
Arc::new(InferenceEngine::load(config)?)
};
let ctx = if self.scheduler.is_some() {
None
} else {
Some(engine.create_context(Some(self.n_ctx))?)
};
let mut tool_registry = ToolRegistry::new();
if !self.skip_builtin_tools {
tools::register_builtin_tools(&mut tool_registry);
}
for tool in self.custom_tools {
tool_registry.register(tool);
}
let mut skill_registry = SkillRegistry::new();
if self.enable_skills {
skill_registry.add_default_paths();
for path in &self.extra_skills_paths {
skill_registry.add_search_path(path.clone());
}
skill_registry.discover();
if self.activated_skills.is_empty() {
skill_registry.load_all();
} else {
for name in &self.activated_skills {
skill_registry.load(name);
}
}
}
let mut agents_md_registry = AgentsMdRegistry::new();
if self.enable_agents_md {
agents_md_registry.discover();
}
let tools_prompt = tool_registry.tools_prompt();
let skills_prompt = if self.enable_skills {
let summary = skill_registry.skills_summary_prompt();
let loaded = skill_registry.loaded_skills_prompt();
if summary.is_empty() && loaded.is_empty() {
String::new()
} else {
format!("{}\n{}", summary, loaded)
}
} else {
String::new()
};
let agents_md_prompt = agents_md_registry.agents_md_prompt();
let mut full_system_prompt = self.system_prompt.clone();
if !agents_md_prompt.is_empty() {
full_system_prompt.push_str("\n\n");
full_system_prompt.push_str(&agents_md_prompt);
}
if !skills_prompt.is_empty() {
full_system_prompt.push_str("\n\n");
full_system_prompt.push_str(&skills_prompt);
}
full_system_prompt.push_str("\n\n");
full_system_prompt.push_str(&tools_prompt);
let conversation = Conversation::with_system_prompt(&full_system_prompt);
Ok(Agent {
engine,
ctx,
conversation,
tool_registry,
permissions: PermissionTracker::new(self.permission_mode),
loop_config: self.loop_config,
skill_registry,
agents_md_registry,
scheduler: self.scheduler,
kv_cache: KvCacheState::new(),
})
}
}
impl Default for AgentBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct Agent {
engine: Arc<InferenceEngine>,
ctx: Option<LlamaContext>,
conversation: Conversation,
tool_registry: ToolRegistry,
permissions: PermissionTracker,
loop_config: AgentLoopConfig,
skill_registry: SkillRegistry,
agents_md_registry: AgentsMdRegistry,
scheduler: Option<Arc<InferenceScheduler>>,
kv_cache: KvCacheState,
}
impl Agent {
pub fn builder() -> AgentBuilder {
AgentBuilder::new()
}
pub fn chat(
&mut self,
user_message: &str,
on_event: impl FnMut(AgentEvent),
) -> Result<(), AgentError> {
self.conversation.add_user(user_message);
let mut permit = self.scheduler.as_ref().map(|s| s.acquire());
let ctx = if let Some(p) = &mut permit {
p.context_mut()
.or(self.ctx.as_mut())
.ok_or_else(|| AgentError::Other("No context available for inference (no pool and no owned context)".to_string()))?
} else {
self.ctx.as_mut().ok_or_else(|| AgentError::Other("Agent has no owned context and no scheduler was provided".to_string()))?
};
run_agent_loop(
&self.engine,
ctx,
&mut self.conversation,
&self.tool_registry,
&mut self.permissions,
&self.loop_config,
&mut self.kv_cache,
on_event,
)
}
pub fn chat_simple(&mut self, user_message: &str) -> Result<String, AgentError> {
let mut response = String::new();
self.chat(user_message, |event| match event {
AgentEvent::TextDelta(text) => response.push_str(&text),
_ => {}
})?;
Ok(response)
}
pub fn engine(&self) -> &Arc<InferenceEngine> {
&self.engine
}
pub fn conversation(&self) -> &Conversation {
&self.conversation
}
pub fn conversation_mut(&mut self) -> &mut Conversation {
&mut self.conversation
}
pub fn tools(&self) -> &ToolRegistry {
&self.tool_registry
}
pub fn register_tool(&mut self, tool: Box<dyn Tool>) {
self.tool_registry.register(tool);
}
pub fn skills(&self) -> &SkillRegistry {
&self.skill_registry
}
pub fn agents_md(&self) -> &AgentsMdRegistry {
&self.agents_md_registry
}
pub fn clear_history(&mut self) {
let msgs = self.conversation.messages().to_vec();
self.conversation.clear();
if let Some(sys) = msgs.first() {
if sys.role == crate::conversation::Role::System {
self.conversation.add_system(&sys.content);
}
}
self.kv_cache.invalidate();
}
}
const DEFAULT_SYSTEM_PROMPT: &str = "\
You are a helpful AI coding assistant. You can interact with the user's codebase \
and system using the tools available to you.
When the user asks you to perform a task:
1. Think through the steps needed
2. Use tools to gather information and make changes
3. Verify your work when appropriate
4. Explain what you did
Be precise and careful with file edits. Always verify file contents before editing.";