1use crate::agent_loop::{AgentEvent, AgentLoopConfig, KvCacheState, run_agent_loop};
2use crate::agents_md::AgentsMdRegistry;
3use crate::conversation::Conversation;
4use crate::error::AgentError;
5use crate::inference::{InferenceConfig, InferenceEngine, InferenceScheduler};
6use crate::permission::{PermissionMode, PermissionTracker};
7use crate::skills::SkillRegistry;
8use crate::tool::{Tool, ToolRegistry};
9use crate::tools;
10use llama_cpp_v3::LlamaContext;
11use std::path::PathBuf;
12use std::sync::Arc;
13
14pub struct AgentBuilder {
52 shared_engine: Option<Arc<InferenceEngine>>,
55
56 backend_type: llama_cpp_v3::backend::Backend,
58 model_path: Option<String>,
59 n_gpu_layers: i32,
60 app_name: String,
61 cache_dir: Option<PathBuf>,
62 explicit_dll_path: Option<PathBuf>,
63 dll_version: Option<String>,
64 chat_template: Option<String>,
65
66 system_prompt: String,
68 n_ctx: u32,
69 loop_config: AgentLoopConfig,
70 permission_mode: PermissionMode,
71 custom_tools: Vec<Box<dyn Tool>>,
72 skip_builtin_tools: bool,
73 enable_skills: bool,
75 extra_skills_paths: Vec<PathBuf>,
76 activated_skills: Vec<String>,
77 enable_agents_md: bool,
79 scheduler: Option<Arc<InferenceScheduler>>,
81}
82
83impl AgentBuilder {
84 pub fn new() -> Self {
85 Self {
86 shared_engine: None,
87 backend_type: llama_cpp_v3::backend::Backend::Cpu,
88 model_path: None,
89 n_gpu_layers: 0,
90 n_ctx: 8192,
91 app_name: "llama-cpp-v3-agent-sdk".to_string(),
92 cache_dir: None,
93 explicit_dll_path: None,
94 dll_version: None,
95 chat_template: None,
96 system_prompt: DEFAULT_SYSTEM_PROMPT.to_string(),
97 loop_config: AgentLoopConfig::default(),
98 permission_mode: PermissionMode::AutoApprove,
99 custom_tools: Vec::new(),
100 skip_builtin_tools: false,
101 enable_skills: true,
102 extra_skills_paths: Vec::new(),
103 activated_skills: Vec::new(),
104 enable_agents_md: true,
105 scheduler: None,
106 }
107 }
108
109 pub fn engine(mut self, engine: Arc<InferenceEngine>) -> Self {
117 self.shared_engine = Some(engine);
118 self
119 }
120
121 pub fn backend(mut self, backend: llama_cpp_v3::backend::Backend) -> Self {
125 self.backend_type = backend;
126 self
127 }
128
129 pub fn model_path(mut self, path: &str) -> Self {
131 self.model_path = Some(path.to_string());
132 self
133 }
134
135 pub fn n_gpu_layers(mut self, n: i32) -> Self {
137 self.n_gpu_layers = n;
138 self
139 }
140
141 pub fn app_name(mut self, name: &str) -> Self {
143 self.app_name = name.to_string();
144 self
145 }
146
147 pub fn cache_dir(mut self, dir: PathBuf) -> Self {
149 self.cache_dir = Some(dir);
150 self
151 }
152
153 pub fn explicit_dll_path(mut self, path: PathBuf) -> Self {
155 self.explicit_dll_path = Some(path);
156 self
157 }
158
159 pub fn dll_version(mut self, version: &str) -> Self {
161 self.dll_version = Some(version.to_string());
162 self
163 }
164
165 pub fn chat_template(mut self, template: &str) -> Self {
167 self.chat_template = Some(template.to_string());
168 self
169 }
170
171 pub fn system_prompt(mut self, prompt: &str) -> Self {
175 self.system_prompt = prompt.to_string();
176 self
177 }
178
179 pub fn n_ctx(mut self, n: u32) -> Self {
181 self.n_ctx = n;
182 self
183 }
184
185 pub fn max_iterations(mut self, n: usize) -> Self {
187 self.loop_config.max_iterations = n;
188 self
189 }
190
191 pub fn max_tokens_per_completion(mut self, n: usize) -> Self {
193 self.loop_config.max_tokens_per_completion = n;
194 self
195 }
196
197 pub fn temperature(mut self, temp: f32) -> Self {
199 self.loop_config.temperature = temp;
200 self
201 }
202
203 pub fn top_k(mut self, k: i32) -> Self {
205 self.loop_config.top_k = k;
206 self
207 }
208
209 pub fn min_p(mut self, p: f32) -> Self {
211 self.loop_config.min_p = p;
212 self
213 }
214
215 pub fn repeat_penalty(mut self, p: f32) -> Self {
217 self.loop_config.repeat_penalty = p;
218 self
219 }
220
221 pub fn stop_sequence(mut self, stop: &str) -> Self {
223 self.loop_config.stop_sequences.push(stop.to_string());
224 self
225 }
226
227 pub fn auto_approve(mut self) -> Self {
229 self.permission_mode = PermissionMode::AutoApprove;
230 self
231 }
232
233 pub fn permission_callback(
235 mut self,
236 cb: impl Fn(&crate::permission::PermissionRequest) -> crate::permission::PermissionDecision
237 + Send
238 + Sync
239 + 'static,
240 ) -> Self {
241 self.permission_mode = PermissionMode::Callback(Box::new(cb));
242 self
243 }
244
245 pub fn tool(mut self, tool: Box<dyn Tool>) -> Self {
247 self.custom_tools.push(tool);
248 self
249 }
250
251 pub fn skip_builtin_tools(mut self) -> Self {
253 self.skip_builtin_tools = true;
254 self
255 }
256
257 pub fn no_skills(mut self) -> Self {
259 self.enable_skills = false;
260 self
261 }
262
263 pub fn skills_path(mut self, path: PathBuf) -> Self {
265 self.extra_skills_paths.push(path);
266 self
267 }
268
269 pub fn activate_skill(mut self, name: &str) -> Self {
271 self.activated_skills.push(name.to_string());
272 self
273 }
274
275 pub fn no_agents_md(mut self) -> Self {
277 self.enable_agents_md = false;
278 self
279 }
280
281 pub fn scheduler(mut self, scheduler: Arc<InferenceScheduler>) -> Self {
287 self.scheduler = Some(scheduler);
288 self
289 }
290
291 pub fn build(self) -> Result<Agent, AgentError> {
296 let engine = if let Some(engine) = self.shared_engine {
298 engine
299 } else {
300 let model_path = self
301 .model_path
302 .ok_or_else(|| AgentError::Other(
303 "No model path specified. Use .model_path() or .engine().".to_string(),
304 ))?;
305
306 let config = InferenceConfig {
307 backend: self.backend_type,
308 model_path,
309 n_gpu_layers: self.n_gpu_layers,
310 n_ctx: self.n_ctx,
311 app_name: self.app_name,
312 explicit_dll_path: self.explicit_dll_path,
313 dll_version: self.dll_version,
314 cache_dir: self.cache_dir,
315 chat_template: self.chat_template,
316 };
317
318 Arc::new(InferenceEngine::load(config)?)
319 };
320
321 let ctx = if self.scheduler.is_some() {
325 None
326 } else {
327 Some(engine.create_context(Some(self.n_ctx))?)
328 };
329
330 let mut tool_registry = ToolRegistry::new();
332 if !self.skip_builtin_tools {
333 tools::register_builtin_tools(&mut tool_registry);
334 }
335 for tool in self.custom_tools {
336 tool_registry.register(tool);
337 }
338
339 let mut skill_registry = SkillRegistry::new();
341 if self.enable_skills {
342 skill_registry.add_default_paths();
343 for path in &self.extra_skills_paths {
344 skill_registry.add_search_path(path.clone());
345 }
346 skill_registry.discover();
347
348 if self.activated_skills.is_empty() {
349 skill_registry.load_all();
350 } else {
351 for name in &self.activated_skills {
352 skill_registry.load(name);
353 }
354 }
355 }
356
357 let mut agents_md_registry = AgentsMdRegistry::new();
359 if self.enable_agents_md {
360 agents_md_registry.discover();
361 }
362
363 let tools_prompt = tool_registry.tools_prompt();
365 let skills_prompt = if self.enable_skills {
366 let summary = skill_registry.skills_summary_prompt();
367 let loaded = skill_registry.loaded_skills_prompt();
368 if summary.is_empty() && loaded.is_empty() {
369 String::new()
370 } else {
371 format!("{}\n{}", summary, loaded)
372 }
373 } else {
374 String::new()
375 };
376 let agents_md_prompt = agents_md_registry.agents_md_prompt();
377
378 let mut full_system_prompt = self.system_prompt.clone();
379 if !agents_md_prompt.is_empty() {
380 full_system_prompt.push_str("\n\n");
381 full_system_prompt.push_str(&agents_md_prompt);
382 }
383 if !skills_prompt.is_empty() {
384 full_system_prompt.push_str("\n\n");
385 full_system_prompt.push_str(&skills_prompt);
386 }
387 full_system_prompt.push_str("\n\n");
388 full_system_prompt.push_str(&tools_prompt);
389
390 let conversation = Conversation::with_system_prompt(&full_system_prompt);
391
392 Ok(Agent {
393 engine,
394 ctx,
395 conversation,
396 tool_registry,
397 permissions: PermissionTracker::new(self.permission_mode),
398 loop_config: self.loop_config,
399 skill_registry,
400 agents_md_registry,
401 scheduler: self.scheduler,
402 kv_cache: KvCacheState::new(),
403 })
404 }
405}
406
407impl Default for AgentBuilder {
408 fn default() -> Self {
409 Self::new()
410 }
411}
412
413pub struct Agent {
419 engine: Arc<InferenceEngine>,
420 ctx: Option<LlamaContext>,
421 conversation: Conversation,
422 tool_registry: ToolRegistry,
423 permissions: PermissionTracker,
424 loop_config: AgentLoopConfig,
425 skill_registry: SkillRegistry,
426 agents_md_registry: AgentsMdRegistry,
427 scheduler: Option<Arc<InferenceScheduler>>,
428 kv_cache: KvCacheState,
429}
430
431impl Agent {
432 pub fn builder() -> AgentBuilder {
434 AgentBuilder::new()
435 }
436
437 pub fn chat(
442 &mut self,
443 user_message: &str,
444 on_event: impl FnMut(AgentEvent),
445 ) -> Result<(), AgentError> {
446 self.conversation.add_user(user_message);
447
448 let mut permit = self.scheduler.as_ref().map(|s| s.acquire());
451
452 let ctx = if let Some(p) = &mut permit {
454 p.context_mut()
455 .or(self.ctx.as_mut())
456 .ok_or_else(|| AgentError::Other("No context available for inference (no pool and no owned context)".to_string()))?
457 } else {
458 self.ctx.as_mut().ok_or_else(|| AgentError::Other("Agent has no owned context and no scheduler was provided".to_string()))?
459 };
460
461 run_agent_loop(
462 &self.engine,
463 ctx,
464 &mut self.conversation,
465 &self.tool_registry,
466 &mut self.permissions,
467 &self.loop_config,
468 &mut self.kv_cache,
469 on_event,
470 )
471 }
472
473 pub fn chat_simple(&mut self, user_message: &str) -> Result<String, AgentError> {
478 let mut response = String::new();
479
480 self.chat(user_message, |event| match event {
481 AgentEvent::TextDelta(text) => response.push_str(&text),
482 _ => {}
483 })?;
484
485 Ok(response)
486 }
487
488 pub fn engine(&self) -> &Arc<InferenceEngine> {
490 &self.engine
491 }
492
493 pub fn conversation(&self) -> &Conversation {
495 &self.conversation
496 }
497
498 pub fn conversation_mut(&mut self) -> &mut Conversation {
500 &mut self.conversation
501 }
502
503 pub fn tools(&self) -> &ToolRegistry {
505 &self.tool_registry
506 }
507
508 pub fn register_tool(&mut self, tool: Box<dyn Tool>) {
510 self.tool_registry.register(tool);
511 }
512
513 pub fn skills(&self) -> &SkillRegistry {
515 &self.skill_registry
516 }
517
518 pub fn agents_md(&self) -> &AgentsMdRegistry {
520 &self.agents_md_registry
521 }
522
523 pub fn clear_history(&mut self) {
525 let msgs = self.conversation.messages().to_vec();
527 self.conversation.clear();
528 if let Some(sys) = msgs.first() {
529 if sys.role == crate::conversation::Role::System {
530 self.conversation.add_system(&sys.content);
531 }
532 }
533 self.kv_cache.invalidate();
535 }
536}
537
538const DEFAULT_SYSTEM_PROMPT: &str = "\
539You are a helpful AI coding assistant. You can interact with the user's codebase \
540and system using the tools available to you.
541
542When the user asks you to perform a task:
5431. Think through the steps needed
5442. Use tools to gather information and make changes
5453. Verify your work when appropriate
5464. Explain what you did
547
548Be precise and careful with file edits. Always verify file contents before editing.";