oxios_kernel/
agent_runtime.rs

1//! Agent runtime: wraps oxi-sdk's Agent for Seed execution.
2//!
3//! The AgentRuntime uses `OxiosEngine.oxi().agent()` (AgentBuilder pattern)
4//! to construct agents with full middleware, observability, and security
5//! integration from oxi-sdk 0.24.0.
6//!
7//! # Architecture
8//!
9//! All tool access goes through `KernelHandle` — the single syscall-table-like
10//! path for agent OS control. The runtime:
11//!
12//! 1. Resolves the agent's CSpace from persona/role/hint
13//! 2. Registers tools via `register_tools_from_cspace()`
14//! 3. Optionally queries `ToolRetriever` for semantic capability hints
15//! 4. Builds an `Agent` via `AgentBuilder` with middleware pipeline
16//! 5. Runs via `Agent::run_streaming()` for real-time event processing
17//!
18//! # oxi-sdk 0.23.0 Integration
19//!
20//! Uses `AgentBuilder` for agent construction with:
21//! - `.with_rate_limit()` — tool call rate limiting
22//! - `.with_token_budget()` — per-execution token caps
23//! - `.tracer()` / `.cost_tracker()` — observability hooks
24//! ## Routing integration (RFC-011)
25//!
26//! Model usage events (`AgentEvent::Usage`) are recorded to the shared
27//! `RoutingStats` so the Web dashboard can display per-model call counts
28//! and estimated costs.
29
30use anyhow::Result;
31use oxi_sdk::observability::AuditTrail;
32use oxi_sdk::{
33    Agent, AgentConfig, AgentEvent, CompactionEvent, CompactionStrategy, ProviderResolver,
34};
35use oxi_sdk::{SearchCache, ToolExecutionMode, ToolRegistry};
36use parking_lot::Mutex;
37use std::collections::HashMap;
38use std::sync::Arc;
39// RFC-014 Phase D: `ToolRegistry::register_arc` is used in the AgentBuilder
40// path to attach CSpace tools after `builder.build()` returns.
41
42use crate::access_manager::{AccessGate, AgentContext, TracingAuditSink, TrailAuditSink};
43use crate::capability::resolve::resolve_cspace;
44use crate::engine::OxiosEngine;
45use crate::memory::{MemoryEntry, MemoryManager, MemoryType};
46use crate::persona::PersonaManager;
47use crate::tools::registration::register_tools_from_cspace_gated;
48
49use crate::KernelHandle;
50use crate::event_bus::KernelEvent;
51use crate::session_context::SessionContext;
52use crate::types::AgentId;
53use oxios_ouroboros::{ExecutionResult, Seed};
54
55/// Global LLM circuit breaker instance — delegates to oxi-sdk's ProviderCircuitBreaker.
56static LLM_CIRCUIT_BREAKER: std::sync::OnceLock<oxi_sdk::ProviderCircuitBreaker> =
57    std::sync::OnceLock::new();
58
59/// Get the global LLM circuit breaker.
60fn get_llm_circuit_breaker() -> &'static oxi_sdk::ProviderCircuitBreaker {
61    LLM_CIRCUIT_BREAKER.get_or_init(|| {
62        oxi_sdk::ProviderCircuitBreaker::new(
63            "global".to_string(),
64            oxi_sdk::CircuitBreakerConfig::default(),
65        )
66    })
67}
68
69/// Configuration for creating AgentRuntime instances.
70#[derive(Debug, Clone)]
71pub struct AgentRuntimeConfig {
72    /// Model ID in `provider/model` format (e.g. `anthropic/claude-sonnet-4-20250514`).
73    pub model_id: String,
74    /// How to execute tool calls within a single turn.
75    pub tool_execution: ToolExecutionMode,
76    /// Whether auto-retry is enabled for retryable LLM errors.
77    pub auto_retry_enabled: bool,
78    /// Bound project paths. AgentRuntime sets CWD to paths[0].
79    pub project_paths: Vec<std::path::PathBuf>,
80    /// Scratch workspace directory for temp files.
81    pub workspace_dir: Option<std::path::PathBuf>,
82    /// API key resolved from CredentialStore at build time.
83    pub api_key: Option<String>,
84    /// Per-provider options for fine-grained control.
85    pub provider_options: Option<oxi_sdk::ProviderOptions>,
86    /// Rate limit for tool calls (requests per minute). 0 = unlimited.
87    pub rate_limit_per_minute: usize,
88    /// Token budget per agent execution. 0 = unlimited.
89    pub token_budget: usize,
90    /// Enable audit logging for all tool executions.
91    pub audit_tool_calls: bool,
92    /// Provider-level RPM for rate-limited provider pool. 0 = no pooling.
93    /// When set, uses `OxiosEngine::pooled_provider()` instead of `create_provider()`.
94    pub provider_rpm: u32,
95}
96
97impl Default for AgentRuntimeConfig {
98    fn default() -> Self {
99        Self {
100            model_id: String::new(),
101            tool_execution: ToolExecutionMode::Parallel,
102            auto_retry_enabled: true,
103            project_paths: Vec::new(),
104            workspace_dir: None,
105            api_key: None,
106            provider_options: None,
107            rate_limit_per_minute: 0,
108            token_budget: 0,
109            audit_tool_calls: false,
110            provider_rpm: 0,
111        }
112    }
113}
114
115/// Mutable state shared between the event callback and the main execute flow.
116#[derive(Default)]
117struct ExecuteState {
118    final_content: String,
119    steps_completed: usize,
120    success: bool,
121    /// Collected trajectory steps for SONA learning (RFC-020 Phase 2).
122    /// Ordered by insertion — parallel tools get their final position
123    /// resolved when they complete, preserving approximate execution order.
124    trajectory_steps: Vec<oxios_memory::memory::sona::TrajectoryStep>,
125    /// Map of tool_call_id → (start instant, index into trajectory_steps).
126    /// Used to correlate ToolExecutionEnd with the correct step when
127    /// parallel tool calls complete out of order.
128    pending_tools: std::collections::HashMap<String, (std::time::Instant, usize)>,
129    /// Ordered tool_call_ids matching trajectory_steps indices.
130    /// Pushed in ToolExecutionStart, same order as trajectory_steps.
131    tool_call_ids: Vec<String>,
132    /// Per-step tool args (JSON string) captured from ToolExecutionStart.
133    tool_args_map: std::collections::HashMap<String, String>,
134    /// Per-step error flag from ToolExecutionEnd.
135    tool_error_map: std::collections::HashMap<String, bool>,
136    /// Per-step start timestamp (UTC) from ToolExecutionStart.
137    tool_timestamps: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
138    /// Cumulative input tokens from AgentEvent::Usage.
139    total_input_tokens: u64,
140    /// Cumulative output tokens from AgentEvent::Usage.
141    total_output_tokens: u64,
142}
143
144/// Runtime that wraps an oxi-sdk `Agent` for executing Seeds.
145///
146/// Each call to [`AgentRuntime::execute`] creates a fresh `Agent`,
147/// builds a ToolRegistry based on the agent's CSpace, and runs it to completion.
148///
149/// All OS-level access goes through `KernelHandle` — the single syscall table
150/// for agent control. Provider/model resolution goes through `EngineHandle`,
151/// which returns the latest `OxiosEngine` (hot-swapped on config change).
152pub struct AgentRuntime {
153    engine_handle: Arc<crate::engine::EngineHandle>,
154    config: AgentRuntimeConfig,
155    /// Single path to all kernel services.
156    kernel_handle: Arc<KernelHandle>,
157    /// Persona manager for system prompt injection.
158    persona_manager: Option<Arc<PersonaManager>>,
159    /// Semantic tool retriever for capability discovery.
160    tool_retriever: Option<Arc<crate::tools::retrieval::ToolRetriever>>,
161    /// Shared routing stats (shared with EngineApi).
162    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
163    /// Autonomous persistence hook (RFC-016).
164    persistence_hook: Option<Arc<crate::persistence_hook::PersistenceHook>>,
165    /// Per-session assistant message index counter (RFC-016).
166    session_msg_counter: Arc<Mutex<HashMap<String, usize>>>,
167}
168
169impl AgentRuntime {
170    /// Creates a new agent runtime with engine handle and kernel access.
171    ///
172    /// Provider/model resolution goes through `engine_handle` (hot-swapped on config change).
173    /// Tool access goes through `kernel_handle`.
174    pub fn new(
175        engine_handle: Arc<crate::engine::EngineHandle>,
176        model_id: impl Into<String>,
177        kernel_handle: Arc<KernelHandle>,
178        routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
179    ) -> Self {
180        Self {
181            engine_handle,
182            config: AgentRuntimeConfig {
183                model_id: model_id.into(),
184                ..Default::default()
185            },
186            kernel_handle,
187            persona_manager: None,
188            tool_retriever: None,
189            routing_stats,
190            persistence_hook: None,
191            session_msg_counter: Arc::new(Mutex::new(HashMap::new())),
192        }
193    }
194
195    /// Attach a PersonaManager for persona system prompt injection.
196    pub fn with_persona_manager(mut self, pm: Arc<PersonaManager>) -> Self {
197        self.persona_manager = Some(pm);
198        self
199    }
200
201    /// Set the runtime config (overrides defaults).
202    pub fn with_config(mut self, config: AgentRuntimeConfig) -> Self {
203        self.config = config;
204        self
205    }
206
207    /// Attach a ToolRetriever for semantic capability discovery.
208    pub fn with_tool_retriever(
209        mut self,
210        retriever: Arc<crate::tools::retrieval::ToolRetriever>,
211    ) -> Self {
212        self.tool_retriever = Some(retriever);
213        self
214    }
215
216    /// Attach a PersistenceHook for autonomous persistence (RFC-016).
217    pub fn with_persistence_hook(
218        mut self,
219        hook: Arc<crate::persistence_hook::PersistenceHook>,
220    ) -> Self {
221        self.persistence_hook = Some(hook);
222        self
223    }
224
225    /// Execute a Seed by running the tool-calling agent to completion.
226    ///
227    /// 1. Resolves CSpace from persona/role/hint
228    /// 2. Registers tools via CSpace
229    /// 3. Recalls memories if available
230    /// 4. Creates Agent via `Agent::new_with_resolver()`
231    /// 5. Runs via `Agent::run_streaming()`
232    pub async fn execute(
233        &self,
234        agent_id: AgentId,
235        seed: &Seed,
236        session_ctx: &mut SessionContext,
237    ) -> Result<ExecutionResult> {
238        // RFC-015: session_id is derived from seed.id for chat transparency
239        // event publishing. Most callers run one Seed per session turn, so
240        // seed.id is a usable session identifier.
241        let session_id: Option<String> = Some(seed.id.to_string());
242        self.execute_with_session(agent_id, seed, session_ctx, session_id)
243            .await
244    }
245
246    /// Like [`execute`](Self::execute) but with an explicit session_id for
247    /// RFC-015 chat transparency event publishing.
248    pub async fn execute_with_session(
249        &self,
250        agent_id: AgentId,
251        seed: &Seed,
252        session_ctx: &mut SessionContext,
253        session_id: Option<String>,
254    ) -> Result<ExecutionResult> {
255        let prompt = build_user_prompt(seed);
256
257        // Get active persona system prompt.
258        let persona_prompt = self
259            .persona_manager
260            .as_ref()
261            .map(|pm| pm.active_system_prompt())
262            .filter(|s| !s.trim().is_empty());
263
264        // Determine persona role for CSpace resolution.
265        let persona_role = self
266            .persona_manager
267            .as_ref()
268            .and_then(|pm| pm.get_active_persona().map(|p| p.role.clone()));
269
270        // Resolve CSpace from persona role, seed hint, or default.
271        let cspace = resolve_cspace(
272            seed.cspace_hint.as_deref(),
273            persona_role.as_deref(),
274            Some("worker"),
275            agent_id,
276        );
277
278        // Build system prompt (without SKILL.md injection — capabilities are
279        // surfaced through the CSpace tool set + semantic retrieval instead).
280        let mut system_prompt = build_system_prompt(seed, persona_prompt.as_deref(), None, None);
281
282        // Semantic capability retrieval: find tools relevant to this seed's goal.
283        let capabilities_xml = if let Some(ref retriever) = self.tool_retriever {
284            match retriever.embedder().embed(&seed.goal).await {
285                Ok(query_vec) => {
286                    let results = retriever.retrieve(&query_vec, 8);
287                    if results.is_empty() {
288                        None
289                    } else {
290                        let xml = crate::tools::retrieval::format_capability_index(&results);
291                        tracing::info!(count = results.len(), "Retrieved relevant capabilities");
292                        Some(xml)
293                    }
294                }
295                Err(e) => {
296                    tracing::warn!(error = %e, "Failed to embed seed goal for retrieval");
297                    None
298                }
299            }
300        } else {
301            None
302        };
303
304        // Build kernel manifest from CSpace active domains.
305        let kernel_manifest = {
306            let domains = cspace.active_domains();
307            if domains.is_empty() {
308                None
309            } else {
310                Some(crate::tools::retrieval::build_kernel_manifest(&domains))
311            }
312        };
313
314        // Rebuild system prompt with capabilities and manifest if available.
315        if capabilities_xml.is_some() || kernel_manifest.is_some() {
316            system_prompt = build_system_prompt(
317                seed,
318                persona_prompt.as_deref(),
319                capabilities_xml.as_deref(),
320                kernel_manifest.as_deref(),
321            );
322        }
323
324        // Blend relevant memories into system prompt.
325        let memory_manager = self.kernel_handle.agents.memory_manager();
326        match memory_manager
327            .recall_with_proactive(&seed.goal, &mut session_ctx.recall_timing)
328            .await
329        {
330            Ok(memories) if !memories.is_empty() => {
331                tracing::info!(count = memories.len(), "Recalled memories for seed");
332                system_prompt = memory_manager.blend_into_prompt(&memories, &system_prompt);
333            }
334            Ok(_) => tracing::debug!("No memories recalled"),
335            Err(e) => tracing::warn!(error = %e, "Failed to recall memories"),
336        }
337
338        // Inject learned strategy from SONA (RFC-020 Phase 2).
339        if let Some(sona) = memory_manager.sona_engine() {
340            match sona.adapt(&seed.goal).await {
341                Ok(Some(pattern)) if pattern.confidence > 0.5 => {
342                    tracing::info!(
343                        domain = %pattern.domain,
344                        confidence = pattern.confidence,
345                        "SONA learned pattern injected"
346                    );
347                    system_prompt.push_str(&format!(
348                        "\n\n## Learned Strategy (confidence: {:.0}%)\n{}\n",
349                        pattern.confidence * 100.0,
350                        pattern.strategy,
351                    ));
352                }
353                Ok(_) => tracing::debug!("No high-confidence SONA pattern found"),
354                Err(e) => tracing::debug!(error = %e, "SONA adapt failed (non-fatal)"),
355            }
356        }
357
358        // Blend relevant knowledge notes into system prompt (KnowledgeLens, RFC-003 Phase 3).
359        match self
360            .kernel_handle
361            .knowledge_lens
362            .recall_for_context(&seed.goal, 5)
363            .await
364        {
365            Ok(ctx) if !ctx.notes.is_empty() => {
366                tracing::info!(
367                    notes = ctx.notes.len(),
368                    memories = ctx.memories.len(),
369                    "Recalled knowledge context for seed"
370                );
371                let knowledge_blend = ctx
372                    .notes
373                    .iter()
374                    .take(3)
375                    .map(|n| format!("## {}\n\n{}", n.name, n.content))
376                    .collect::<Vec<_>>()
377                    .join("\n\n");
378                system_prompt.push_str("\n\n## Relevant Knowledge\n\n");
379                system_prompt.push_str(&knowledge_blend);
380            }
381            Ok(_) => tracing::debug!("No knowledge recalled"),
382            Err(e) => tracing::warn!(error = %e, "Failed to recall knowledge context"),
383        }
384
385        // Resolve model from engine (provider resolution happens inside AgentBuilder).
386        // Get the latest engine — may have been hot-swapped via Web UI config change.
387        let engine = self.engine_handle.get();
388        let _model = engine.resolve_model(&self.config.model_id)?;
389        let seed_id = seed.id;
390
391        // Build the agent.
392        let config = self.config.clone();
393        let kernel_handle = Arc::clone(&self.kernel_handle);
394
395        // Extract audit trail from kernel for TrailAuditSink wiring.
396        let audit_trail: Option<Arc<AuditTrail>> =
397            Some(Arc::clone(&self.kernel_handle.security.audit_trail));
398
399        let (
400            mut final_content,
401            steps_completed,
402            success,
403            trajectory_steps,
404            agent,
405            tool_call_ids,
406            tool_args_map,
407            tool_error_map,
408            tool_timestamps,
409            total_input_tokens,
410            total_output_tokens,
411        ) = {
412            run_agent(
413                &config,
414                &engine,
415                kernel_handle,
416                system_prompt,
417                prompt,
418                seed_id,
419                seed.goal.clone(),
420                agent_id,
421                cspace,
422                audit_trail,
423                self.routing_stats.clone(),
424                session_id.clone(),
425            )
426            .await?
427        };
428
429        // ── Post-execution: safety net for empty final content ──
430        //
431        // oxi 0.32.0 removed max_iterations — the loop now exits naturally
432        // when the LLM produces a text-only response (pi-agent behavior).
433        // This block is kept as a safety net in case the LLM returns empty
434        // text despite a natural exit (rare, but possible).
435        if final_content.is_empty() && !trajectory_steps.is_empty() {
436            let tool_summary: Vec<String> = trajectory_steps
437                .iter()
438                .enumerate()
439                .map(|(i, step)| {
440                    let truncated = if step.output.len() > 800 {
441                        format!("{}...", &step.output[..800])
442                    } else {
443                        step.output.clone()
444                    };
445                    format!("{}. [{}] {}", i + 1, step.input, truncated)
446                })
447                .collect();
448            let summary_prompt = format!(
449                "도구 실행 결과:\n\n{}\n\n\
450                 위 결과를 바탕으로 사용자의 요청에 대해 자연스럽게 한국어로 답변해주세요. \
451                 도구의 원시 출력을 그대로 복사하지 말고, 의미 있는 내용만 정리해서 전달하세요.",
452                tool_summary.join("\n")
453            );
454            match agent.run(summary_prompt).await {
455                Ok((response, _events)) => {
456                    if !response.content.is_empty() {
457                        tracing::info!(seed_id = %seed_id, "Post-execution summary generated");
458                        final_content = response.content;
459                    }
460                }
461                Err(e) => {
462                    tracing::warn!(error = %e, "Post-execution summary failed");
463                }
464            }
465        }
466
467        // Map trajectory steps to tool call records for the execution result.
468        // tool_call_ids[i] corresponds to trajectory_steps[i].
469        let tool_calls: Vec<oxios_ouroboros::ToolCallRecord> = trajectory_steps
470            .iter()
471            .enumerate()
472            .map(|(i, step)| {
473                let tc_id = tool_call_ids.get(i).cloned().unwrap_or_default();
474                let args_str = tool_call_ids
475                    .get(i)
476                    .and_then(|id| tool_args_map.get(id))
477                    .cloned()
478                    .unwrap_or_default();
479                let is_error = tool_call_ids
480                    .get(i)
481                    .and_then(|id| tool_error_map.get(id))
482                    .copied()
483                    .unwrap_or(false);
484                let timestamp = tool_call_ids
485                    .get(i)
486                    .and_then(|id| tool_timestamps.get(id))
487                    .copied();
488                let input_str = truncate_json_str(&args_str, 500);
489                oxios_ouroboros::ToolCallRecord {
490                    tool: step.input.clone(),
491                    input: input_str,
492                    output: step.output.clone(),
493                    duration_ms: step.duration_ms,
494                    is_error,
495                    tool_call_id: tc_id,
496                    timestamp,
497                }
498            })
499            .collect();
500
501        tracing::info!(
502            seed_id = %seed_id,
503            steps = steps_completed,
504            success,
505            tool_calls = tool_calls.len(),
506            "AgentRuntime finished"
507        );
508
509        let result = ExecutionResult {
510            output: final_content.clone(),
511            steps_completed,
512            success,
513            tool_calls,
514            tokens_input: total_input_tokens,
515            tokens_output: total_output_tokens,
516            model_id: self.engine_handle.get().default_model_id().to_string(),
517        };
518
519        // RFC-016: Autonomous persistence hook.
520        // Runs after successful execution, fire-and-forget.
521        if success && let Some(hook) = &self.persistence_hook {
522            let already_saved_knowledge = trajectory_steps
523                .iter()
524                .any(|s| s.input == "knowledge" && s.output.contains("written successfully"));
525            let hook = hook.clone();
526            let seed_clone = seed.clone();
527            let traj_clone = trajectory_steps.clone();
528            let output_clone = final_content.clone();
529            let sid = session_id.clone();
530            // Compute the assistant message index for this execution.
531            // Increment per-session counter, then use the pre-increment value.
532            let msg_index = {
533                let mut counter = self.session_msg_counter.lock();
534                let idx = counter.entry(sid.clone().unwrap_or_default()).or_insert(0);
535                let current = *idx;
536                *idx += 1;
537                current
538            };
539            tokio::spawn(async move {
540                match hook
541                    .evaluate(
542                        &seed_clone,
543                        &traj_clone,
544                        &output_clone,
545                        already_saved_knowledge,
546                    )
547                    .await
548                {
549                    Ok(plan) => {
550                        if !plan.memory.is_empty() || !plan.knowledge.is_empty() {
551                            tracing::info!(
552                                memory = plan.memory.len(),
553                                knowledge = plan.knowledge.len(),
554                                message_index = msg_index,
555                                "PersistenceHook executing plan"
556                            );
557                            let session_id = sid.unwrap_or_default();
558                            hook.execute_plan(plan, &session_id, msg_index).await;
559                        }
560                    }
561                    Err(e) => tracing::warn!(error = %e, "PersistenceHook evaluate failed"),
562                }
563            });
564        }
565
566        Ok(result)
567    }
568}
569
570/// Create and run an oxi-sdk `Agent` with CSpace-based tool registration.
571///
572/// Uses `engine.oxi().agent()` (AgentBuilder) for full middleware,
573/// observability, and security integration from oxi-sdk 0.23.0.
574#[allow(clippy::too_many_arguments)]
575async fn run_agent(
576    config: &AgentRuntimeConfig,
577    engine: &OxiosEngine,
578    kernel_handle: Arc<KernelHandle>,
579    system_prompt: String,
580    prompt: String,
581    seed_id: uuid::Uuid,
582    seed_goal: String,
583    agent_id: AgentId,
584    cspace: crate::capability::CSpace,
585    audit_trail: Option<Arc<AuditTrail>>,
586    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
587    session_id: Option<String>,
588) -> Result<(
589    String,
590    usize,
591    bool,
592    Vec<oxios_memory::memory::sona::TrajectoryStep>,
593    Arc<Agent>,
594    Vec<String>,
595    std::collections::HashMap<String, String>,
596    std::collections::HashMap<String, bool>,
597    std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
598    u64,
599    u64,
600)> {
601    // Extract workspace.
602    let workspace = if !config.project_paths.is_empty() {
603        config.project_paths[0].clone()
604    } else if let Some(ref ws) = config.workspace_dir {
605        ws.clone()
606    } else {
607        std::env::temp_dir()
608            .join("oxios-agent-workspace")
609            .join(agent_id.to_string())
610    };
611
612    // Ensure workspace exists.
613    let _ = std::fs::create_dir_all(&workspace);
614
615    tracing::debug!(workspace = %workspace.display(), "Agent workspace scoped");
616
617    // Ensure all paths the agent might access are in allowed_paths.
618    //
619    // AgentLifecycleManager::ensure_permissions() adds kernel.workspace (~/.oxios/workspace),
620    // but the agent operates in different directories depending on context:
621    //
622    //   1. CWD -- oxi-sdk tools (ReadTool, LsTool, etc.) resolve relative paths
623    //      against std::env::current_dir(), which is the daemon working directory.
624    //      Without CWD in allowed_paths, EVERY file tool call is denied.
625    //   2. The designated workspace -- computed from project_paths / workspace_dir / temp.
626    //   3. Kernel workspace -- state store path for seeds, sessions, etc.
627    //   4. /tmp -- general temp file access.
628    //
629    // All four must be in allowed_paths before GatedTool wraps any tool.
630    {
631        use crate::access_manager::{Role, Subject};
632        let agent_name = format!("agent-{agent_id}");
633        let mut am = kernel_handle.exec.access_manager().lock();
634        let perms = am.get_or_create_permissions(&agent_name);
635
636        // 1. CWD -- critical: oxi-sdk resolves relative paths here
637        if let Ok(cwd) = std::env::current_dir() {
638            let cwd_pattern = format!("{}/**", cwd.to_string_lossy().trim_end_matches('/'));
639            if !perms.allowed_paths.iter().any(|p| p == &cwd_pattern) {
640                perms.allow_path(&cwd_pattern);
641                tracing::debug!(
642                    agent = %agent_name,
643                    path = %cwd_pattern,
644                    "Added CWD to agent allowed paths"
645                );
646            }
647        }
648
649        // 2. Designated workspace
650        let ws_pattern = format!("{}/**", workspace.to_string_lossy().trim_end_matches('/'));
651        if !perms.allowed_paths.iter().any(|p| p == &ws_pattern) {
652            perms.allow_path(&ws_pattern);
653        }
654
655        // 3. Kernel workspace (state store path)
656        let kernel_ws = kernel_handle
657            .state
658            .workspace_path()
659            .to_string_lossy()
660            .to_string();
661        let kernel_ws_pattern = format!("{}/**", kernel_ws.trim_end_matches('/'));
662        if kernel_ws_pattern != ws_pattern
663            && !perms.allowed_paths.iter().any(|p| p == &kernel_ws_pattern)
664        {
665            perms.allow_path(&kernel_ws_pattern);
666        }
667
668        // 4. /tmp -- for general temp file access
669        if !perms.allowed_paths.iter().any(|p| p == "/tmp/**") {
670            perms.allow_path("/tmp/**");
671        }
672
673        // Ensure RBAC Superuser role so AccessGate Layer 1 passes.
674        let rbac_subject = Subject::Agent(agent_id);
675        am.rbac_manager_mut()
676            .assign_role(rbac_subject, Role::Superuser);
677    }
678
679    // Start distributed trace span for this agent execution.
680    let _trace_guard = crate::observability::tracer().start(
681        format!("seed-{}", &seed_id.to_string()[..8]).as_str(),
682        oxi_sdk::SpanKind::Agent,
683    );
684
685    // ── Register tools based on CSpace (with access gate) ──
686    let registry = ToolRegistry::new();
687    let search_cache = Arc::new(SearchCache::new());
688
689    // Build agent context for security
690    let agent_context = AgentContext {
691        agent_id,
692        agent_name: format!("agent-{agent_id}"),
693        cspace: Arc::new(cspace.clone()),
694    };
695
696    // Build audit sink: TrailAuditSink (Merkle chain + JSONL) when audit_trail
697    // is available, otherwise fall back to TracingAuditSink.
698    let audit_sink: Arc<dyn crate::access_manager::AuditSink> = if let Some(trail) = audit_trail {
699        let audit_path = kernel_handle
700            .state
701            .workspace_path()
702            .join("audit")
703            .join("access.jsonl");
704        Arc::new(TrailAuditSink::new(trail, audit_path))
705    } else {
706        Arc::new(TracingAuditSink)
707    };
708
709    // Build access gate from kernel's security infrastructure
710    let access_gate = Arc::new(AccessGate::new(
711        kernel_handle.exec.access_manager().clone(),
712        Arc::new(kernel_handle.exec.config_snapshot()),
713        audit_sink,
714    ));
715
716    register_tools_from_cspace_gated(
717        &registry,
718        &kernel_handle,
719        &cspace,
720        search_cache,
721        agent_id,
722        access_gate,
723        agent_context,
724    );
725
726    tracing::info!(
727        seed_id = %seed_id,
728        capabilities = cspace.len(),
729        "Tools registered from CSpace"
730    );
731
732    // ── Build AgentConfig ──
733    //
734    // RFC-014 Phase D: `system_prompt` is also passed to the new
735    // `AgentBuilder::system_prompt()` (which overrides the value embedded
736    // in `AgentConfig` at build time). We clone here so the builder path
737    // can consume the value while the legacy `Agent::new_with_resolver`
738    // path still sees it in the config.
739    let agent_config = AgentConfig {
740        name: format!("agent-{agent_id}"),
741        description: None,
742        model_id: config.model_id.clone(),
743        system_prompt: Some(system_prompt.clone()),
744        timeout_seconds: 300,
745        temperature: Some(0.7),
746        max_tokens: Some(8192),
747        compaction_strategy: CompactionStrategy::Threshold(0.8),
748        compaction_instruction: None,
749        context_window: 128_000,
750        api_key: config.api_key.clone(),
751        workspace_dir: config.project_paths.first().cloned(),
752        output_mode: None,
753        provider_options: config.provider_options.clone(),
754    };
755
756    // ── Build Agent (RFC-014 Phase D) ──
757    //
758    // Two paths:
759    //   1. `provider_rpm == 0` (common): use oxi-sdk 0.26.2's new
760    //      `AgentBuilder` API. The builder unifies model resolution, provider
761    //      creation, and (optionally) middleware wiring. Engine-level
762    //      `authorizer` / `tracer` / `cost_tracker` are propagated through
763    //      the new builder methods.
764    //   2. `provider_rpm > 0` (rare): keep the legacy
765    //      `Agent::new_with_resolver` + `set_hooks` path because the
766    //      AgentBuilder does not expose a way to inject a pre-built
767    //      `ProviderPool` for rate-limited access. This is a deliberate
768    //      scope-limit per RFC-014/phase-d-agentbuilder.md §2 "Provider
769    //      선택 로직은 보존".
770    let agent = if config.provider_rpm > 0 {
771        // ── Legacy path: rate-limited provider pool ──
772        let resolver: Arc<dyn ProviderResolver> = Arc::new(engine.oxi().clone());
773        let provider_name = engine.resolve_model(&config.model_id)?.provider;
774        let provider = engine.pooled_provider(&provider_name, config.provider_rpm)?;
775
776        // Build middleware pipeline.
777        let mut pipeline = oxi_sdk::MiddlewarePipeline::new();
778        if config.rate_limit_per_minute > 0 {
779            pipeline = pipeline.push(oxi_sdk::middleware::builtins::RateLimitMiddleware::new(
780                config.rate_limit_per_minute,
781            ));
782        }
783        if config.token_budget > 0 {
784            pipeline = pipeline.push(oxi_sdk::middleware::builtins::TokenBudgetMiddleware::new(
785                config.token_budget,
786            ));
787        }
788        if config.audit_tool_calls {
789            pipeline = pipeline.push(oxi_sdk::middleware::builtins::LoggingMiddleware::new(
790                tracing::Level::INFO,
791            ));
792        }
793
794        // Create Agent with CSpace tool registry and provider resolver.
795        let agent = Arc::new(Agent::new_with_resolver(
796            provider,
797            agent_config,
798            Arc::new(registry),
799            resolver,
800        ));
801
802        // Wire middleware pipeline → AgentHooks.
803        if !pipeline.is_empty() {
804            let terminate_flag = Arc::new(std::sync::atomic::AtomicBool::new(false));
805            let agent_id_for_hooks = agent_id.to_string();
806            let hooks = oxi_sdk::middleware::build_hooks(
807                Arc::new(pipeline),
808                agent_id_for_hooks,
809                terminate_flag,
810            );
811            agent.set_hooks(hooks);
812        }
813
814        agent
815    } else {
816        // ── New path: AgentBuilder (RFC-014 Phase D) ──
817        let mut builder = engine
818            .oxi()
819            .agent(agent_config)
820            .workspace(&workspace)
821            .system_prompt(system_prompt);
822
823        // CSpace-based tool registration is oxios-specific and is preserved.
824        //
825        // The builder's `.tool()` method takes `impl AgentTool + 'static`
826        // (a concrete value), but oxios' CSpace tools are `Arc<dyn AgentTool>`.
827        // The SDK does not expose a way to inject a pre-built `ToolRegistry`
828        // into the builder, so we register them on the agent's tool registry
829        // after `build()` returns. This keeps CSpace semantics intact.
830        //
831        // We capture the tool names now and apply them once the agent exists.
832        let cspace_tool_arcs: Vec<Arc<dyn oxi_sdk::AgentTool>> = registry
833            .names()
834            .into_iter()
835            .filter_map(|name| registry.get(&name))
836            .collect();
837
838        // Engine-level observability/security → AgentBuilder (new API).
839        if let Some(auth) = engine.authorizer() {
840            builder = builder.authorizer(auth.clone());
841        }
842        if let Some(tracer) = engine.tracer() {
843            builder = builder.tracer(tracer.clone());
844        }
845        if let Some(ct) = engine.cost_tracker() {
846            builder = builder.cost_tracker(ct.clone());
847        }
848
849        // Middleware: AgentBuilder convenience helpers replace the manual
850        // `MiddlewarePipeline` + `build_hooks()` + `set_hooks()` triple.
851        if config.rate_limit_per_minute > 0 {
852            builder = builder.with_rate_limit(config.rate_limit_per_minute);
853        }
854        if config.token_budget > 0 {
855            builder = builder.with_token_budget(config.token_budget);
856        }
857        if config.audit_tool_calls {
858            builder = builder.with_logging();
859        }
860
861        let built = builder.build()?;
862        let agent = Arc::new(built);
863
864        // Attach CSpace tools to the agent's tool registry.
865        // `Agent::tools()` returns the same `Arc<ToolRegistry>` that
866        // `AgentBuilder` populated, so `register_arc` is the canonical
867        // extension point for `Arc<dyn AgentTool>` values.
868        let agent_tools = agent.tools();
869        for tool in cspace_tool_arcs {
870            agent_tools.register_arc(tool);
871        }
872
873        agent
874    };
875
876    // Shared mutable state for the event callback.
877    let exec_state = Arc::new(Mutex::new(ExecuteState::default()));
878    let exec_state_cb = Arc::clone(&exec_state);
879    let memory_for_callback: Arc<MemoryManager> = (*kernel_handle.agents.memory_manager()).clone();
880    let session_id_for_callback = seed_id.to_string();
881    let model_id_for_callback = config.model_id.clone();
882    let agent_id_for_callback = agent_id.to_string();
883    let routing_stats_for_cb = routing_stats.clone();
884    // RFC-015: real-time event publishing for chat transparency.
885    // Falls back to None when the caller did not opt in.
886    let transparency_session: Option<String> = session_id.clone();
887    let kernel_handle_for_cb: Arc<KernelHandle> = Arc::clone(&kernel_handle);
888
889    // Run the agent with streaming events.
890    let result = agent
891        .run_streaming(prompt, move |event| {
892            let mut s = exec_state_cb.lock();
893            match event {
894                AgentEvent::ToolExecutionStart {
895                    tool_name,
896                    tool_call_id,
897                    args,
898                    context,
899                    ..
900                } => {
901                    // Record start time and push a placeholder step.
902                    let idx = s.trajectory_steps.len();
903                    s.pending_tools
904                        .insert(tool_call_id.clone(), (std::time::Instant::now(), idx));
905                    s.tool_args_map.insert(
906                        tool_call_id.clone(),
907                        serde_json::to_string(&args).unwrap_or_default(),
908                    );
909                    s.tool_timestamps
910                        .insert(tool_call_id.clone(), chrono::Utc::now());
911                    s.tool_call_ids.push(tool_call_id.clone());
912                    s.trajectory_steps
913                        .push(oxios_memory::memory::sona::TrajectoryStep {
914                            input: tool_name.clone(),
915                            output: String::new(),
916                            duration_ms: 0,
917                            confidence: 0.0,
918                        });
919                    // RFC-015: broadcast tool start so Web UI can show progress.
920                    if let Some(ref sid) = transparency_session {
921                        let context_json = context
922                            .as_ref()
923                            .map(serde_json::to_value)
924                            .transpose()
925                            .unwrap_or(None);
926                        let _ =
927                            kernel_handle_for_cb
928                                .infra
929                                .publish(KernelEvent::ToolExecutionStarted {
930                                    session_id: sid.clone(),
931                                    tool_name: tool_name.clone(),
932                                    tool_call_id: tool_call_id.clone(),
933                                    tool_args: args.clone(),
934                                    context: context_json,
935                                });
936                    }
937                }
938                AgentEvent::ToolExecutionUpdate {
939                    tool_call_id,
940                    tool_name,
941                    partial_result,
942                    tab_id,
943                    context,
944                } => {
945                    // RFC-015: forward real-time progress to the event bus
946                    // so the Web UI can show a spinner and progress text
947                    // while the tool is still executing. Best-effort —
948                    // publish failures (e.g. lagged subscribers) are ignored.
949                    //
950                    // `tab_id` and `context` come from oxi-agent 0.29+
951                    // (ToolCallContext: PageVisit, WebSearch, etc.).
952                    // Older agent versions won't send these — they default
953                    // to None and the UI gracefully ignores them.
954                    if let Some(ref sid) = transparency_session {
955                        let context_json = context
956                            .as_ref()
957                            .map(serde_json::to_value)
958                            .transpose()
959                            .unwrap_or(None);
960                        let _ = kernel_handle_for_cb.infra.publish(
961                            KernelEvent::ToolExecutionProgress {
962                                session_id: sid.clone(),
963                                tool_call_id: tool_call_id.clone(),
964                                tool_name: tool_name.clone(),
965                                progress: partial_result,
966                                tab_id,
967                                context: context_json,
968                            },
969                        );
970                    }
971                }
972                AgentEvent::ToolExecutionEnd {
973                    tool_name,
974                    tool_call_id,
975                    is_error,
976                    result,
977                    ..
978                } => {
979                    if !is_error {
980                        s.steps_completed += 1;
981                    }
982                    // Look up the exact step by tool_call_id.
983                    let mut duration_ms: u64 = 0;
984                    let mut summary = String::new();
985                    if let Some((start, idx)) = s.pending_tools.remove(tool_call_id.as_str()) {
986                        duration_ms = start.elapsed().as_millis() as u64;
987                        if let Some(step) = s.trajectory_steps.get_mut(idx) {
988                            summary = summarize_tool_result(&result.content, 200);
989                            step.output = summary.clone();
990                            step.duration_ms = duration_ms;
991                            step.confidence = if is_error { 0.3 } else { 0.8 };
992                        }
993                    }
994                    s.tool_error_map.insert(tool_call_id.clone(), is_error);
995                    // RFC-015: broadcast tool completion.
996                    if let Some(ref sid) = transparency_session {
997                        let _ = kernel_handle_for_cb.infra.publish(
998                            KernelEvent::ToolExecutionFinished {
999                                session_id: sid.clone(),
1000                                tool_call_id: tool_call_id.clone(),
1001                                tool_name: tool_name.clone(),
1002                                duration_ms,
1003                                is_error,
1004                                output_summary: summary,
1005                            },
1006                        );
1007                    }
1008                }
1009                AgentEvent::AgentEnd {
1010                    messages,
1011                    stop_reason,
1012                    ..
1013                } => {
1014                    if let Some(oxi_sdk::Message::Assistant(a)) = messages.last() {
1015                        s.final_content = a.text_content();
1016                    }
1017                    // oxi 0.32.0: loop exits naturally when LLM produces text-only
1018                    // response (StopReason::Stop). Error/Aborted = failure.
1019                    // ToolUse should not occur at AgentEnd in 0.32.0 (the loop
1020                    // continues until text-only), but treat it as non-failure
1021                    // since tool calls were executed successfully.
1022                    s.success = matches!(stop_reason.as_deref(), Some("Stop") | Some("ToolUse"));
1023                }
1024                AgentEvent::Error { message, .. } => {
1025                    s.final_content = message.clone();
1026                    s.success = false;
1027                }
1028                AgentEvent::Usage {
1029                    input_tokens,
1030                    output_tokens,
1031                } => {
1032                    // Accumulate totals for ExecutionResult.
1033                    s.total_input_tokens += input_tokens as u64;
1034                    s.total_output_tokens += output_tokens as u64;
1035
1036                    // Record token usage to cost tracker (existing).
1037                    let agent_label = format!("agent-{agent_id_for_callback}");
1038                    crate::observability::cost_tracker().record(
1039                        &agent_label,
1040                        &oxi_sdk::Model::new(
1041                            &model_id_for_callback,
1042                            &model_id_for_callback,
1043                            oxi_sdk::Api::OpenAiCompletions,
1044                            "unknown",
1045                            "https://unknown.com",
1046                        ),
1047                        oxi_sdk::TokenUsage {
1048                            input: input_tokens as u64,
1049                            output: output_tokens as u64,
1050                            cache_read: 0,
1051                            cache_write: 0,
1052                        },
1053                    );
1054
1055                    // Record to routing stats (RFC-011).
1056                    if let Some(stats) = &routing_stats_for_cb {
1057                        let cost = crate::kernel_handle::engine_api::estimate_cost(
1058                            &model_id_for_callback,
1059                            input_tokens as u64,
1060                            output_tokens as u64,
1061                        );
1062                        stats.record_model_usage(&model_id_for_callback, cost);
1063                    }
1064                    // RFC-015: publish cumulative token usage.
1065                    if let Some(ref sid) = transparency_session {
1066                        let _ = kernel_handle_for_cb
1067                            .infra
1068                            .publish(KernelEvent::TokenUsageUpdate {
1069                                session_id: sid.clone(),
1070                                input_tokens: input_tokens as u64,
1071                                output_tokens: output_tokens as u64,
1072                            });
1073                    }
1074                }
1075                AgentEvent::Compaction {
1076                    event: CompactionEvent::Completed { result, .. },
1077                } => {
1078                    handle_compaction(
1079                        result.summary.clone(),
1080                        session_id_for_callback.clone(),
1081                        memory_for_callback.clone(),
1082                    );
1083                    // RFC-015: compaction is a form of reasoning — expose it.
1084                    if let Some(ref sid) = transparency_session {
1085                        let _ =
1086                            kernel_handle_for_cb
1087                                .infra
1088                                .publish(KernelEvent::ReasoningFragment {
1089                                    session_id: sid.clone(),
1090                                    content: result.summary.clone(),
1091                                    source: "compaction".to_string(),
1092                                });
1093                    }
1094                }
1095                _ => {}
1096            }
1097        })
1098        .await;
1099
1100    // Record circuit breaker result after agent execution.
1101    let circuit = get_llm_circuit_breaker();
1102    if result.is_err() {
1103        circuit.record_failure();
1104        crate::metrics::get_metrics()
1105            .llm_circuit_breaker_state
1106            .set(1.0);
1107    } else {
1108        circuit.record_success();
1109        crate::metrics::get_metrics()
1110            .llm_circuit_breaker_state
1111            .set(0.0);
1112    }
1113
1114    if let Err(e) = result {
1115        tracing::error!(seed_id = %seed_id, error = %e, "Agent failed");
1116        let s = exec_state.lock();
1117        return Ok((
1118            format!("Agent failed: {e}"),
1119            s.steps_completed,
1120            false,
1121            s.trajectory_steps.clone(),
1122            agent,
1123            s.tool_call_ids.clone(),
1124            s.tool_args_map.clone(),
1125            s.tool_error_map.clone(),
1126            s.tool_timestamps.clone(),
1127            s.total_input_tokens,
1128            s.total_output_tokens,
1129        ));
1130    }
1131
1132    let s = exec_state.lock();
1133    tracing::info!(
1134        seed_id = %seed_id,
1135        steps = s.steps_completed,
1136        success = s.success,
1137        "Agent completed"
1138    );
1139
1140    // Record trajectory to SONA learning engine (RFC-020 Phase 2).
1141    // Fire-and-forget: don't block the result on learning.
1142    if !s.trajectory_steps.is_empty()
1143        && let Some(sona) = kernel_handle.agents.memory_manager().sona_engine()
1144    {
1145        let steps = s.trajectory_steps.clone();
1146        let success = s.success;
1147        let sona = Arc::clone(sona);
1148        let domain = infer_domain(&seed_goal);
1149        tokio::spawn(async move {
1150            let verdict = if success {
1151                oxios_memory::memory::sona::Verdict::Success
1152            } else {
1153                oxios_memory::memory::sona::Verdict::Failure
1154            };
1155            let trajectory = oxios_memory::memory::sona::Trajectory::new(steps, verdict, &domain);
1156            if let Err(e) = sona.record(trajectory).await {
1157                tracing::debug!(error = %e, "SONA trajectory recording failed (non-fatal)");
1158            }
1159        });
1160    }
1161
1162    Ok((
1163        s.final_content.clone(),
1164        s.steps_completed,
1165        s.success,
1166        s.trajectory_steps.clone(),
1167        agent,
1168        s.tool_call_ids.clone(),
1169        s.tool_args_map.clone(),
1170        s.tool_error_map.clone(),
1171        s.tool_timestamps.clone(),
1172        s.total_input_tokens,
1173        s.total_output_tokens,
1174    ))
1175}
1176
1177/// Summarize a tool result string to fit within `max_len` characters.
1178///
1179/// Uses char-aware truncation to avoid panicking on multi-byte UTF-8
1180/// (e.g., Korean, CJK, emoji).
1181fn summarize_tool_result(result: &str, max_len: usize) -> String {
1182    let trimmed = result.trim();
1183    if trimmed.chars().count() <= max_len {
1184        return trimmed.to_string();
1185    }
1186    // Take the first line or truncate.
1187    let first_line = trimmed.lines().next().unwrap_or("");
1188    if first_line.chars().count() <= max_len {
1189        first_line.to_string()
1190    } else {
1191        let truncated: String = first_line.chars().take(max_len - 3).collect();
1192        format!("{truncated}...")
1193    }
1194}
1195
1196/// Truncate a JSON string representation to `max_len` chars for storage
1197/// in tool call records. Returns the original string if short enough,
1198/// otherwise truncates and appends "...".
1199fn truncate_json_str(json_str: &str, max_len: usize) -> String {
1200    if json_str.len() <= max_len {
1201        return json_str.to_string();
1202    }
1203    let truncated: String = json_str.chars().take(max_len - 3).collect();
1204    format!("{truncated}...")
1205}
1206
1207/// Infer a domain category from a seed goal for SONA trajectory grouping.
1208///
1209/// Extracts the core verb + object from the goal to create a meaningful
1210/// domain label. Falls back to "general" for unrecognizable patterns.
1211fn infer_domain(goal: &str) -> String {
1212    let lower = goal.to_lowercase();
1213    let keywords: Vec<&str> = lower.split_whitespace().take(8).collect();
1214
1215    // Check for known domain indicators.
1216    if keywords.iter().any(|k| {
1217        [
1218            "test",
1219            "tests",
1220            "spec",
1221            "testing",
1222            "assert",
1223            "unit test",
1224            "integration",
1225        ]
1226        .contains(k)
1227    }) {
1228        return "testing".to_string();
1229    }
1230    if keywords
1231        .iter()
1232        .any(|k| ["deploy", "release", "publish", "ship"].contains(k))
1233    {
1234        return "deployment".to_string();
1235    }
1236    if keywords
1237        .iter()
1238        .any(|k| ["fix", "bug", "patch", "repair", "debug"].contains(k))
1239    {
1240        return "bugfix".to_string();
1241    }
1242    if keywords
1243        .iter()
1244        .any(|k| ["refactor", "restructure", "reorganize", "rewrite"].contains(k))
1245    {
1246        return "refactoring".to_string();
1247    }
1248    if keywords
1249        .iter()
1250        .any(|k| ["doc", "document", "readme", "guide", "explain"].contains(k))
1251    {
1252        return "documentation".to_string();
1253    }
1254    if keywords
1255        .iter()
1256        .any(|k| ["build", "create", "implement", "add", "make", "new"].contains(k))
1257    {
1258        return "development".to_string();
1259    }
1260    if keywords
1261        .iter()
1262        .any(|k| ["analyze", "review", "audit", "inspect", "check"].contains(k))
1263    {
1264        return "analysis".to_string();
1265    }
1266    if keywords
1267        .iter()
1268        .any(|k| ["config", "setup", "install", "configure", "init"].contains(k))
1269    {
1270        return "configuration".to_string();
1271    }
1272
1273    // Fallback: first 2 meaningful words
1274    let meaningful: Vec<&str> = lower
1275        .split_whitespace()
1276        .filter(|w| w.len() > 2)
1277        .take(2)
1278        .collect();
1279    if meaningful.len() >= 2 {
1280        meaningful.join("_")
1281    } else {
1282        "general".to_string()
1283    }
1284}
1285
1286/// Handle compaction completion by storing the summary as a Warm memory.
1287///
1288/// Extracts the compaction summary from the event and spawns a background
1289/// task to persist it via MemoryManager. This replaces the inline 30-line
1290/// block that was previously in the event callback.
1291fn handle_compaction(summary: String, session_id: String, memory_manager: Arc<MemoryManager>) {
1292    let entry = MemoryEntry {
1293        id: uuid::Uuid::new_v4().to_string(),
1294        memory_type: MemoryType::Conversation,
1295        tier: crate::memory::MemoryTier::Warm,
1296        content: summary,
1297        content_hash: 0,
1298        source: "compaction".to_string(),
1299        session_id: Some(session_id),
1300        tags: vec![],
1301        importance: 0.5,
1302        pinned: false,
1303        protection: crate::memory::ProtectionLevel::None,
1304        auto_classified: false,
1305        session_appearances: 0,
1306        user_corrected: false,
1307        seen_in_sessions: vec![],
1308        created_at: chrono::Utc::now(),
1309        accessed_at: chrono::Utc::now(),
1310        modified_at: chrono::Utc::now(),
1311        access_count: 0,
1312        decay_score: 1.0,
1313        compaction_level: 0,
1314        compacted_from: vec![],
1315        related_ids: vec![],
1316        contradicts: None,
1317    };
1318    tokio::spawn(async move {
1319        if let Err(e) = memory_manager.remember(entry).await {
1320            tracing::warn!(error = %e, "Failed to save compaction summary");
1321        }
1322    });
1323}
1324
1325/// Build a system prompt from the Seed's goal, constraints, persona,
1326/// and optionally a capability index and kernel manifest.
1327///
1328/// Note: SKILL.md content is no longer injected here. Capabilities are
1329/// surfaced through the CSpace tool set + semantic retrieval instead.
1330fn build_system_prompt(
1331    seed: &Seed,
1332    persona_prompt: Option<&str>,
1333    capabilities_xml: Option<&str>,
1334    kernel_manifest: Option<&str>,
1335) -> String {
1336    let mut prompt = String::from(
1337        "You are an autonomous agent in the Oxios operating system.\n\
1338         You execute Seeds — immutable specifications with goals, constraints, and\n\
1339         acceptance criteria.\n\n\
1340         ## Available Tools\n\
1341         You have the following tools:\n\
1342         - **File tools**: read, write, edit files; grep, find, ls for searching\n\
1343         - **Web tools**: web_search for searching the web, get_search_results for retrieving cached results\n\
1344         - **Exec**: run shell commands\n\
1345         - **Memory tools**: memory_read, memory_write, memory_search — agent's internal recall\n\
1346         - **Knowledge**: knowledge — personal markdown vault for documents and notes\n\
1347         - **Kernel tools**: agent, project, persona, cron, security, budget, resource\n\n\
1348         **Important**: When the task involves fetching information from the internet,\n\
1349         websites, or online services, use `web_search` first — do NOT search local files.\n\
1350         When the task asks to \"get\", \"fetch\", \"find online\", or \"look up\" something\n\
1351         from the web, use `web_search`.\n",
1352    );
1353    prompt.push_str(&format!("\n## Goal\n{}\n", seed.goal));
1354
1355    // Preserve user's original wording so the agent sees exact language,
1356    // filenames, and nuances that may have been abstracted in the goal.
1357    if !seed.original_request.is_empty() && seed.original_request != seed.goal {
1358        prompt.push_str(&format!(
1359            "\n## User's Original Request\n{}\n",
1360            seed.original_request
1361        ));
1362    }
1363
1364    if !seed.constraints.is_empty() {
1365        prompt.push_str("\n## Constraints\n");
1366        for (i, c) in seed.constraints.iter().enumerate() {
1367            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1368        }
1369    }
1370
1371    if !seed.acceptance_criteria.is_empty() {
1372        prompt.push_str("\n## Acceptance Criteria\n");
1373        for (i, c) in seed.acceptance_criteria.iter().enumerate() {
1374            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1375        }
1376    }
1377
1378    if !seed.ontology.is_empty() {
1379        prompt.push_str("\n## Domain Entities\n");
1380        for e in &seed.ontology {
1381            prompt.push_str(&format!(
1382                "- **{}** ({}): {}\n",
1383                e.name, e.entity_type, e.description
1384            ));
1385        }
1386    }
1387
1388    // Inject persona system prompt
1389    if let Some(pp) = persona_prompt {
1390        prompt.push_str("\n## Persona\n");
1391        prompt.push_str(pp);
1392        prompt.push('\n');
1393    }
1394
1395    // Inject semantic capability index (from ToolRetriever)
1396    if let Some(xml) = capabilities_xml {
1397        prompt.push_str("\n## Available Capabilities\n");
1398        prompt.push_str("The following capabilities are relevant to your goal. ");
1399        prompt.push_str("Use the `read` tool to load SKILL.md for any program.\n\n");
1400        prompt.push_str(xml);
1401        prompt.push('\n');
1402    }
1403
1404    // Inject kernel manifest (from CSpace)
1405    if let Some(manifest) = kernel_manifest {
1406        prompt.push('\n');
1407        prompt.push_str(manifest);
1408        prompt.push('\n');
1409    }
1410
1411    // Execution environment guidance
1412    prompt.push_str(
1413        "\n## Execution Protocol\n\
1414         1. UNDERSTAND — Read the Seed completely before acting.\n\
1415         2. PLAN — Determine the minimal set of actions needed.\n\
1416         3. EXECUTE — Use tools to accomplish the goal. Prefer the simplest approach.\n\
1417         4. VERIFY — After each action, check the result: created a file? read it back.\n\
1418         5. REPORT — Summarize how each acceptance criterion was met, with evidence.\n\n\
1419         ## Hard Boundaries\n\
1420         - NEVER modify files outside the workspace scope\n\
1421         - NEVER execute destructive commands without confirming scope\n\
1422         - NEVER claim completion without evidence — show the output, not your opinion\n\
1423         - NEVER add features or improvements beyond the Seed scope\n\
1424         - If you cannot complete the Seed, say so and explain WHY\n\n\
1425         ## Scope Guard\n\
1426         The Seed defines your universe. Do not:\n\
1427         - Refactor code the Seed didn't mention\n\
1428         - Add tests the Seed didn't require\n\
1429         - Change configuration the Seed didn't specify\n\
1430         - \"Improve\" anything beyond what the acceptance criteria demand\n\n\
1431         ## Error Handling\n\
1432         - If a tool fails, read the error message carefully before retrying\n\
1433         - If a command fails, do NOT immediately retry with --force or sudo\n\
1434         - If stuck after 3 attempts, report the blocker rather than continuing to fail\n\n\
1435         ## Shape Matching\n\
1436         Match your output to the task: simple task → concise response.\n\
1437         Do not write 50 lines when 5 would do.\n\
1438         Use `exec` for all command execution (git, gh, osascript, etc.).",
1439    );
1440
1441    prompt
1442}
1443
1444/// Build the user prompt from the seed.
1445fn build_user_prompt(seed: &Seed) -> String {
1446    format!(
1447        "Execute the following goal:\n\n{}\n\nAcceptance criteria:\n{}",
1448        seed.goal,
1449        seed.acceptance_criteria
1450            .iter()
1451            .enumerate()
1452            .map(|(i, c)| format!("{}. {}", i + 1, c))
1453            .collect::<Vec<_>>()
1454            .join("\n")
1455    )
1456}
1457
1458impl std::fmt::Debug for AgentRuntime {
1459    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1460        f.debug_struct("AgentRuntime")
1461            .field("model_id", &self.config.model_id)
1462            .finish()
1463    }
1464}
1465
1466#[cfg(test)]
1467mod tests {
1468    use super::*;
1469    use async_trait::async_trait;
1470    use oxi_sdk::{AgentTool, ToolContext, ToolError};
1471    use oxios_ouroboros::Entity;
1472    use serde_json::Value;
1473
1474    /// A test tool that does nothing — used to populate the registry.
1475    struct DummyTool {
1476        name: String,
1477    }
1478
1479    #[async_trait]
1480    impl AgentTool for DummyTool {
1481        fn name(&self) -> &str {
1482            &self.name
1483        }
1484        fn label(&self) -> &str {
1485            &self.name
1486        }
1487        fn description(&self) -> &str {
1488            "Test tool"
1489        }
1490        fn parameters_schema(&self) -> Value {
1491            serde_json::json!({"type": "object"})
1492        }
1493
1494        async fn execute(
1495            &self,
1496            _tool_call_id: &str,
1497            _params: Value,
1498            _shutdown: Option<tokio::sync::oneshot::Receiver<()>>,
1499            _ctx: &ToolContext,
1500        ) -> Result<oxi_sdk::AgentToolResult, ToolError> {
1501            Ok(oxi_sdk::AgentToolResult::success("ok"))
1502        }
1503    }
1504
1505    /// Test that requires_tools validation passes when all tools are present.
1506    #[test]
1507    fn test_requires_tools_validation_passes() {
1508        let registry = ToolRegistry::new();
1509
1510        registry.register(DummyTool {
1511            name: "read".into(),
1512        });
1513        registry.register(DummyTool {
1514            name: "exec".into(),
1515        });
1516
1517        let missing = registry.missing(&["read", "exec"]);
1518
1519        assert!(
1520            missing.is_empty(),
1521            "Expected no missing tools, got: {:?}",
1522            missing
1523        );
1524    }
1525
1526    /// Test that requires_tools validation fails when a tool is missing.
1527    #[test]
1528    fn test_requires_tools_validation_fails() {
1529        let registry = ToolRegistry::new();
1530
1531        registry.register(DummyTool {
1532            name: "read".into(),
1533        });
1534
1535        let missing = registry.missing(&["read", "exec", "nonexistent"]);
1536
1537        assert_eq!(missing, vec!["exec", "nonexistent"]);
1538    }
1539
1540    #[test]
1541    fn test_build_system_prompt_includes_goal() {
1542        let seed = Seed {
1543            id: uuid::Uuid::new_v4(),
1544            goal: "Build a web server".into(),
1545            constraints: vec!["Must use Rust".into()],
1546            acceptance_criteria: vec!["Server responds to requests".into()],
1547            ontology: vec![Entity {
1548                name: "HttpServer".into(),
1549                entity_type: "struct".into(),
1550                description: "The main server struct".into(),
1551            }],
1552            created_at: chrono::Utc::now(),
1553            generation: 0,
1554            parent_seed_id: None,
1555            cspace_hint: None,
1556            original_request: String::new(),
1557            output_schema: None,
1558            project_id: None,
1559        };
1560
1561        let prompt = build_system_prompt(&seed, None, None, None);
1562
1563        assert!(prompt.contains("Build a web server"));
1564        assert!(prompt.contains("Must use Rust"));
1565        assert!(prompt.contains("Server responds to requests"));
1566        assert!(prompt.contains("HttpServer"));
1567        assert!(prompt.contains("struct"));
1568    }
1569
1570    #[test]
1571    fn test_build_system_prompt_empty() {
1572        let seed = Seed {
1573            id: uuid::Uuid::new_v4(),
1574            goal: "Test goal".into(),
1575            constraints: vec![],
1576            acceptance_criteria: vec![],
1577            ontology: vec![],
1578            created_at: chrono::Utc::now(),
1579            generation: 0,
1580            parent_seed_id: None,
1581            cspace_hint: None,
1582            original_request: String::new(),
1583            output_schema: None,
1584            project_id: None,
1585        };
1586
1587        let prompt = build_system_prompt(&seed, None, None, None);
1588
1589        assert!(prompt.contains("Test goal"));
1590    }
1591
1592    #[test]
1593    fn test_infer_domain_testing() {
1594        assert_eq!(infer_domain("run all unit tests for the kernel"), "testing");
1595    }
1596
1597    #[test]
1598    fn test_infer_domain_deployment() {
1599        assert_eq!(
1600            infer_domain("deploy the web service to production"),
1601            "deployment"
1602        );
1603    }
1604
1605    #[test]
1606    fn test_infer_domain_bugfix() {
1607        assert_eq!(infer_domain("fix the null pointer error in main"), "bugfix");
1608    }
1609
1610    #[test]
1611    fn test_infer_domain_development() {
1612        assert_eq!(
1613            infer_domain("create a new REST API endpoint"),
1614            "development"
1615        );
1616    }
1617
1618    #[test]
1619    fn test_infer_domain_analysis() {
1620        assert_eq!(
1621            infer_domain("review the code for security issues"),
1622            "analysis"
1623        );
1624    }
1625
1626    #[test]
1627    fn test_infer_domain_fallback() {
1628        let domain = infer_domain("optimize performance metrics");
1629        // Should fall back to first 2 meaningful words
1630        assert!(!domain.is_empty());
1631    }
1632}
oxios_kernel/agent_runtime.rs

oxios_kernel/
agent_runtime.rs