oxios_kernel/
agent_runtime.rs

1//! Agent runtime: wraps oxi-sdk's Agent for Seed execution.
2//!
3//! The AgentRuntime uses `OxiosEngine.oxi().agent()` (AgentBuilder pattern)
4//! to construct agents with full middleware, observability, and security
5//! integration from oxi-sdk 0.24.0.
6//!
7//! # Architecture
8//!
9//! All tool access goes through `KernelHandle` — the single syscall-table-like
10//! path for agent OS control. The runtime:
11//!
12//! 1. Resolves the agent's CSpace from persona/role/hint
13//! 2. Registers tools via `register_tools_from_cspace()`
14//! 3. Optionally queries `ToolRetriever` for semantic capability hints
15//! 4. Builds an `Agent` via `AgentBuilder` with middleware pipeline
16//! 5. Runs via `Agent::run_streaming()` for real-time event processing
17//!
18//! # oxi-sdk 0.23.0 Integration
19//!
20//! Uses `AgentBuilder` for agent construction with:
21//! - `.with_rate_limit()` — tool call rate limiting
22//! - `.with_token_budget()` — per-execution token caps
23//! - `.tracer()` / `.cost_tracker()` — observability hooks
24//! ## Routing integration (RFC-011)
25//!
26//! Model usage events (`AgentEvent::Usage`) are recorded to the shared
27//! `RoutingStats` so the Web dashboard can display per-model call counts
28//! and estimated costs.
29
30use anyhow::Result;
31use oxi_sdk::observability::AuditTrail;
32use oxi_sdk::{
33    Agent, AgentConfig, AgentEvent, CompactionEvent, CompactionStrategy, ProviderResolver,
34};
35use oxi_sdk::{SearchCache, ToolExecutionMode, ToolRegistry};
36use parking_lot::Mutex;
37use std::collections::HashMap;
38use std::sync::Arc;
39// RFC-014 Phase D: `ToolRegistry::register_arc` is used in the AgentBuilder
40// path to attach CSpace tools after `builder.build()` returns.
41
42use crate::access_manager::{AccessGate, AgentContext, TracingAuditSink, TrailAuditSink};
43use crate::capability::resolve::resolve_cspace;
44use crate::engine::OxiosEngine;
45use crate::memory::{MemoryEntry, MemoryManager, MemoryType};
46use crate::persona::PersonaManager;
47use crate::tools::registration::register_tools_from_cspace_gated;
48
49use crate::KernelHandle;
50use crate::event_bus::KernelEvent;
51use crate::session_context::SessionContext;
52use crate::types::AgentId;
53use oxios_ouroboros::{ExecutionResult, Seed};
54
55/// Global LLM circuit breaker instance — delegates to oxi-sdk's ProviderCircuitBreaker.
56static LLM_CIRCUIT_BREAKER: std::sync::OnceLock<oxi_sdk::ProviderCircuitBreaker> =
57    std::sync::OnceLock::new();
58
59/// Get the global LLM circuit breaker.
60fn get_llm_circuit_breaker() -> &'static oxi_sdk::ProviderCircuitBreaker {
61    LLM_CIRCUIT_BREAKER.get_or_init(|| {
62        oxi_sdk::ProviderCircuitBreaker::new(
63            "global".to_string(),
64            oxi_sdk::CircuitBreakerConfig::default(),
65        )
66    })
67}
68
69/// Configuration for creating AgentRuntime instances.
70#[derive(Debug, Clone)]
71pub struct AgentRuntimeConfig {
72    /// Model ID in `provider/model` format (e.g. `anthropic/claude-sonnet-4-20250514`).
73    pub model_id: String,
74    /// How to execute tool calls within a single turn.
75    pub tool_execution: ToolExecutionMode,
76    /// Whether auto-retry is enabled for retryable LLM errors.
77    pub auto_retry_enabled: bool,
78    /// Bound project paths. AgentRuntime sets CWD to paths[0].
79    pub project_paths: Vec<std::path::PathBuf>,
80    /// Scratch workspace directory for temp files.
81    pub workspace_dir: Option<std::path::PathBuf>,
82    /// API key resolved from CredentialStore at build time.
83    pub api_key: Option<String>,
84    /// Per-provider options for fine-grained control.
85    pub provider_options: Option<oxi_sdk::ProviderOptions>,
86    /// Rate limit for tool calls (requests per minute). 0 = unlimited.
87    pub rate_limit_per_minute: usize,
88    /// Token budget per agent execution. 0 = unlimited.
89    pub token_budget: usize,
90    /// Enable audit logging for all tool executions.
91    pub audit_tool_calls: bool,
92    /// Provider-level RPM for rate-limited provider pool. 0 = no pooling.
93    /// When set, uses `OxiosEngine::pooled_provider()` instead of `create_provider()`.
94    pub provider_rpm: u32,
95}
96
97impl Default for AgentRuntimeConfig {
98    fn default() -> Self {
99        Self {
100            model_id: String::new(),
101            tool_execution: ToolExecutionMode::Parallel,
102            auto_retry_enabled: true,
103            project_paths: Vec::new(),
104            workspace_dir: None,
105            api_key: None,
106            provider_options: None,
107            rate_limit_per_minute: 0,
108            token_budget: 0,
109            audit_tool_calls: false,
110            provider_rpm: 0,
111        }
112    }
113}
114
115/// Mutable state shared between the event callback and the main execute flow.
116#[derive(Default)]
117struct ExecuteState {
118    final_content: String,
119    steps_completed: usize,
120    success: bool,
121    /// Collected trajectory steps for SONA learning (RFC-020 Phase 2).
122    /// Ordered by insertion — parallel tools get their final position
123    /// resolved when they complete, preserving approximate execution order.
124    trajectory_steps: Vec<oxios_memory::memory::sona::TrajectoryStep>,
125    /// Map of tool_call_id → (start instant, index into trajectory_steps).
126    /// Used to correlate ToolExecutionEnd with the correct step when
127    /// parallel tool calls complete out of order.
128    pending_tools: std::collections::HashMap<String, (std::time::Instant, usize)>,
129    /// Ordered tool_call_ids matching trajectory_steps indices.
130    /// Pushed in ToolExecutionStart, same order as trajectory_steps.
131    tool_call_ids: Vec<String>,
132    /// Per-step tool args (JSON string) captured from ToolExecutionStart.
133    tool_args_map: std::collections::HashMap<String, String>,
134    /// Per-step error flag from ToolExecutionEnd.
135    tool_error_map: std::collections::HashMap<String, bool>,
136    /// Per-step start timestamp (UTC) from ToolExecutionStart.
137    tool_timestamps: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
138    /// Cumulative input tokens from AgentEvent::Usage.
139    total_input_tokens: u64,
140    /// Cumulative output tokens from AgentEvent::Usage.
141    total_output_tokens: u64,
142}
143
144/// Runtime that wraps an oxi-sdk `Agent` for executing Seeds.
145///
146/// Each call to [`AgentRuntime::execute`] creates a fresh `Agent`,
147/// builds a ToolRegistry based on the agent's CSpace, and runs it to completion.
148///
149/// All OS-level access goes through `KernelHandle` — the single syscall table
150/// for agent control. Provider/model resolution goes through `EngineHandle`,
151/// which returns the latest `OxiosEngine` (hot-swapped on config change).
152pub struct AgentRuntime {
153    engine_handle: Arc<crate::engine::EngineHandle>,
154    config: AgentRuntimeConfig,
155    /// Single path to all kernel services.
156    kernel_handle: Arc<KernelHandle>,
157    /// Persona manager for system prompt injection.
158    persona_manager: Option<Arc<PersonaManager>>,
159    /// Semantic tool retriever for capability discovery.
160    tool_retriever: Option<Arc<crate::tools::retrieval::ToolRetriever>>,
161    /// Shared routing stats (shared with EngineApi).
162    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
163    /// Autonomous persistence hook (RFC-016).
164    persistence_hook: Option<Arc<crate::persistence_hook::PersistenceHook>>,
165    /// Per-session assistant message index counter (RFC-016).
166    session_msg_counter: Arc<Mutex<HashMap<String, usize>>>,
167}
168
169impl AgentRuntime {
170    /// Creates a new agent runtime with engine handle and kernel access.
171    ///
172    /// The active model is resolved live from `engine_handle` on each
173    /// `execute()` (reads the post-hot-swap default) — there is no frozen
174    /// model id at construction. Tool access goes through `kernel_handle`.
175    pub fn new(
176        engine_handle: Arc<crate::engine::EngineHandle>,
177        kernel_handle: Arc<KernelHandle>,
178        routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
179    ) -> Self {
180        Self {
181            engine_handle,
182            config: AgentRuntimeConfig::default(),
183            kernel_handle,
184            persona_manager: None,
185            tool_retriever: None,
186            routing_stats,
187            persistence_hook: None,
188            session_msg_counter: Arc::new(Mutex::new(HashMap::new())),
189        }
190    }
191
192    /// Attach a PersonaManager for persona system prompt injection.
193    pub fn with_persona_manager(mut self, pm: Arc<PersonaManager>) -> Self {
194        self.persona_manager = Some(pm);
195        self
196    }
197
198    /// Set the runtime config (overrides defaults).
199    pub fn with_config(mut self, config: AgentRuntimeConfig) -> Self {
200        self.config = config;
201        self
202    }
203
204    /// Attach a ToolRetriever for semantic capability discovery.
205    pub fn with_tool_retriever(
206        mut self,
207        retriever: Arc<crate::tools::retrieval::ToolRetriever>,
208    ) -> Self {
209        self.tool_retriever = Some(retriever);
210        self
211    }
212
213    /// Attach a PersistenceHook for autonomous persistence (RFC-016).
214    pub fn with_persistence_hook(
215        mut self,
216        hook: Arc<crate::persistence_hook::PersistenceHook>,
217    ) -> Self {
218        self.persistence_hook = Some(hook);
219        self
220    }
221
222    /// Execute a Seed by running the tool-calling agent to completion.
223    ///
224    /// 1. Resolves CSpace from persona/role/hint
225    /// 2. Registers tools via CSpace
226    /// 3. Recalls memories if available
227    /// 4. Creates Agent via `Agent::new_with_resolver()`
228    /// 5. Runs via `Agent::run_streaming()`
229    pub async fn execute(
230        &self,
231        agent_id: AgentId,
232        seed: &Seed,
233        session_ctx: &mut SessionContext,
234    ) -> Result<ExecutionResult> {
235        // RFC-015: session_id is derived from seed.id for chat transparency
236        // event publishing. Most callers run one Seed per session turn, so
237        // seed.id is a usable session identifier.
238        let session_id: Option<String> = Some(seed.id.to_string());
239        self.execute_with_session(agent_id, seed, session_ctx, session_id)
240            .await
241    }
242
243    /// Like [`execute`](Self::execute) but with an explicit session_id for
244    /// RFC-015 chat transparency event publishing.
245    pub async fn execute_with_session(
246        &self,
247        agent_id: AgentId,
248        seed: &Seed,
249        session_ctx: &mut SessionContext,
250        session_id: Option<String>,
251    ) -> Result<ExecutionResult> {
252        let prompt = build_user_prompt(seed);
253
254        // Get active persona system prompt.
255        let persona_prompt = self
256            .persona_manager
257            .as_ref()
258            .map(|pm| pm.active_system_prompt())
259            .filter(|s| !s.trim().is_empty());
260
261        // Determine persona role for CSpace resolution.
262        let persona_role = self
263            .persona_manager
264            .as_ref()
265            .and_then(|pm| pm.get_active_persona().map(|p| p.role.clone()));
266
267        // Resolve CSpace from persona role, seed hint, or default.
268        let cspace = resolve_cspace(
269            seed.cspace_hint.as_deref(),
270            persona_role.as_deref(),
271            Some("worker"),
272            agent_id,
273        );
274
275        // Build system prompt (without SKILL.md injection — capabilities are
276        // surfaced through the CSpace tool set + semantic retrieval instead).
277        let mut system_prompt = build_system_prompt(
278            seed,
279            persona_prompt.as_deref(),
280            None,
281            None,
282            seed.workspace_context.as_deref(),
283        );
284
285        // Semantic capability retrieval: find tools relevant to this seed's goal.
286        let capabilities_xml = if let Some(ref retriever) = self.tool_retriever {
287            match retriever.embedder().embed(&seed.goal).await {
288                Ok(query_vec) => {
289                    let results = retriever.retrieve(&query_vec, 8);
290                    if results.is_empty() {
291                        None
292                    } else {
293                        let xml = crate::tools::retrieval::format_capability_index(&results);
294                        tracing::info!(count = results.len(), "Retrieved relevant capabilities");
295                        Some(xml)
296                    }
297                }
298                Err(e) => {
299                    tracing::warn!(error = %e, "Failed to embed seed goal for retrieval");
300                    None
301                }
302            }
303        } else {
304            None
305        };
306
307        // Build kernel manifest from CSpace active domains.
308        let kernel_manifest = {
309            let domains = cspace.active_domains();
310            if domains.is_empty() {
311                None
312            } else {
313                Some(crate::tools::retrieval::build_kernel_manifest(&domains))
314            }
315        };
316
317        // Rebuild system prompt with capabilities and manifest if available.
318        if capabilities_xml.is_some() || kernel_manifest.is_some() {
319            system_prompt = build_system_prompt(
320                seed,
321                persona_prompt.as_deref(),
322                capabilities_xml.as_deref(),
323                kernel_manifest.as_deref(),
324                seed.workspace_context.as_deref(),
325            );
326        }
327
328        // Blend relevant memories into system prompt.
329        let memory_manager = self.kernel_handle.agents.memory_manager();
330        match memory_manager
331            .recall_with_proactive(&seed.goal, &mut session_ctx.recall_timing)
332            .await
333        {
334            Ok(memories) if !memories.is_empty() => {
335                tracing::info!(count = memories.len(), "Recalled memories for seed");
336                system_prompt = memory_manager.blend_into_prompt(&memories, &system_prompt);
337            }
338            Ok(_) => tracing::debug!("No memories recalled"),
339            Err(e) => tracing::warn!(error = %e, "Failed to recall memories"),
340        }
341
342        // Inject learned strategy from SONA (RFC-020 Phase 2).
343        if let Some(sona) = memory_manager.sona_engine() {
344            match sona.adapt(&seed.goal).await {
345                Ok(Some(pattern)) if pattern.confidence > 0.5 => {
346                    tracing::info!(
347                        domain = %pattern.domain,
348                        confidence = pattern.confidence,
349                        "SONA learned pattern injected"
350                    );
351                    system_prompt.push_str(&format!(
352                        "\n\n## Learned Strategy (confidence: {:.0}%)\n{}\n",
353                        pattern.confidence * 100.0,
354                        pattern.strategy,
355                    ));
356                }
357                Ok(_) => tracing::debug!("No high-confidence SONA pattern found"),
358                Err(e) => tracing::debug!(error = %e, "SONA adapt failed (non-fatal)"),
359            }
360        }
361
362        // Blend relevant knowledge notes into system prompt (KnowledgeLens, RFC-003 Phase 3).
363        match self
364            .kernel_handle
365            .knowledge_lens
366            .recall_for_context(&seed.goal, 5)
367            .await
368        {
369            Ok(ctx) if !ctx.notes.is_empty() => {
370                tracing::info!(
371                    notes = ctx.notes.len(),
372                    memories = ctx.memories.len(),
373                    "Recalled knowledge context for seed"
374                );
375                let knowledge_blend = ctx
376                    .notes
377                    .iter()
378                    .take(3)
379                    .map(|n| format!("## {}\n\n{}", n.name, n.content))
380                    .collect::<Vec<_>>()
381                    .join("\n\n");
382                system_prompt.push_str("\n\n## Relevant Knowledge\n\n");
383                system_prompt.push_str(&knowledge_blend);
384            }
385            Ok(_) => tracing::debug!("No knowledge recalled"),
386            Err(e) => tracing::warn!(error = %e, "Failed to recall knowledge context"),
387        }
388
389        // Resolve the LIVE default model (post-hot-swap). This is the single
390        // source of truth — the same engine default the OuroborosEngine reads
391        // via the ModelResolver port. Validates fail-fast: a bad model ID set
392        // via the Web UI is rejected here at execute entry, before any tool work.
393        let engine = self.engine_handle.get();
394        let model_id = engine.default_model_id().to_string();
395        engine.resolve_model(&model_id)?;
396        let seed_id = seed.id;
397
398        // Build the agent. Refresh config.model_id to the live value so every
399        // downstream consumer (AgentConfig, legacy provider path, usage callback)
400        // uses the same model as the interview/seed phases — no frozen boot
401        // string that silently diverges from what interview used.
402        let mut config = self.config.clone();
403        config.model_id = model_id;
404        let kernel_handle = Arc::clone(&self.kernel_handle);
405
406        // Extract audit trail from kernel for TrailAuditSink wiring.
407        let audit_trail: Option<Arc<AuditTrail>> =
408            Some(Arc::clone(&self.kernel_handle.security.audit_trail));
409
410        let (
411            mut final_content,
412            steps_completed,
413            success,
414            trajectory_steps,
415            agent,
416            tool_call_ids,
417            tool_args_map,
418            tool_error_map,
419            tool_timestamps,
420            total_input_tokens,
421            total_output_tokens,
422        ) = {
423            run_agent(
424                &config,
425                &engine,
426                kernel_handle,
427                system_prompt,
428                prompt,
429                seed_id,
430                seed.goal.clone(),
431                agent_id,
432                cspace,
433                audit_trail,
434                self.routing_stats.clone(),
435                session_id.clone(),
436                &seed.mount_paths,
437            )
438            .await?
439        };
440
441        // ── Post-execution: safety net for empty final content ──
442        //
443        // oxi 0.32.0 removed max_iterations — the loop now exits naturally
444        // when the LLM produces a text-only response (pi-agent behavior).
445        // This block is kept as a safety net in case the LLM returns empty
446        // text despite a natural exit (rare, but possible).
447        if final_content.is_empty() && !trajectory_steps.is_empty() {
448            let tool_summary: Vec<String> = trajectory_steps
449                .iter()
450                .enumerate()
451                .map(|(i, step)| {
452                    let truncated = if step.output.len() > 800 {
453                        // Char-boundary safe truncation: roll back to the
454                        // nearest UTF-8 boundary so multibyte sequences
455                        // (Korean, CJK, emoji) don't panic on byte slicing.
456                        let mut end = 800;
457                        while end > 0 && !step.output.is_char_boundary(end) {
458                            end -= 1;
459                        }
460                        format!("{}...", &step.output[..end])
461                    } else {
462                        step.output.clone()
463                    };
464                    format!("{}. [{}] {}", i + 1, step.input, truncated)
465                })
466                .collect();
467            let summary_prompt = format!(
468                "도구 실행 결과:\n\n{}\n\n\
469                 위 결과를 바탕으로 사용자의 요청에 대해 자연스럽게 한국어로 답변해주세요. \
470                 도구의 원시 출력을 그대로 복사하지 말고, 의미 있는 내용만 정리해서 전달하세요.",
471                tool_summary.join("\n")
472            );
473            match agent.run(summary_prompt).await {
474                Ok((response, _events)) => {
475                    if !response.content.is_empty() {
476                        tracing::info!(seed_id = %seed_id, "Post-execution summary generated");
477                        final_content = response.content;
478                    }
479                }
480                Err(e) => {
481                    tracing::warn!(error = %e, "Post-execution summary failed");
482                }
483            }
484        }
485
486        // Map trajectory steps to tool call records for the execution result.
487        // tool_call_ids[i] corresponds to trajectory_steps[i].
488        let tool_calls: Vec<oxios_ouroboros::ToolCallRecord> = trajectory_steps
489            .iter()
490            .enumerate()
491            .map(|(i, step)| {
492                let tc_id = tool_call_ids.get(i).cloned().unwrap_or_default();
493                let args_str = tool_call_ids
494                    .get(i)
495                    .and_then(|id| tool_args_map.get(id))
496                    .cloned()
497                    .unwrap_or_default();
498                let is_error = tool_call_ids
499                    .get(i)
500                    .and_then(|id| tool_error_map.get(id))
501                    .copied()
502                    .unwrap_or(false);
503                let timestamp = tool_call_ids
504                    .get(i)
505                    .and_then(|id| tool_timestamps.get(id))
506                    .copied();
507                let input_str = truncate_json_str(&args_str, 500);
508                oxios_ouroboros::ToolCallRecord {
509                    tool: step.input.clone(),
510                    input: input_str,
511                    output: step.output.clone(),
512                    duration_ms: step.duration_ms,
513                    is_error,
514                    tool_call_id: tc_id,
515                    timestamp,
516                }
517            })
518            .collect();
519
520        tracing::info!(
521            seed_id = %seed_id,
522            steps = steps_completed,
523            success,
524            tool_calls = tool_calls.len(),
525            "AgentRuntime finished"
526        );
527
528        let result = ExecutionResult {
529            output: final_content.clone(),
530            steps_completed,
531            success,
532            tool_calls,
533            tokens_input: total_input_tokens,
534            tokens_output: total_output_tokens,
535            model_id: self.engine_handle.get().default_model_id().to_string(),
536        };
537
538        // RFC-016: Autonomous persistence hook.
539        // Runs after successful execution, fire-and-forget.
540        if success && let Some(hook) = &self.persistence_hook {
541            let already_saved_knowledge = trajectory_steps
542                .iter()
543                .any(|s| s.input == "knowledge" && s.output.contains("written successfully"));
544            let hook = hook.clone();
545            let seed_clone = seed.clone();
546            let traj_clone = trajectory_steps.clone();
547            let output_clone = final_content.clone();
548            let sid = session_id.clone();
549            // Compute the assistant message index for this execution.
550            // Increment per-session counter, then use the pre-increment value.
551            let msg_index = {
552                let mut counter = self.session_msg_counter.lock();
553                let idx = counter.entry(sid.clone().unwrap_or_default()).or_insert(0);
554                let current = *idx;
555                *idx += 1;
556                current
557            };
558            tokio::spawn(async move {
559                match hook
560                    .evaluate(
561                        &seed_clone,
562                        &traj_clone,
563                        &output_clone,
564                        already_saved_knowledge,
565                    )
566                    .await
567                {
568                    Ok(plan) => {
569                        if !plan.memory.is_empty() || !plan.knowledge.is_empty() {
570                            tracing::info!(
571                                memory = plan.memory.len(),
572                                knowledge = plan.knowledge.len(),
573                                message_index = msg_index,
574                                "PersistenceHook executing plan"
575                            );
576                            let session_id = sid.unwrap_or_default();
577                            hook.execute_plan(plan, &session_id, msg_index).await;
578                        }
579                    }
580                    Err(e) => tracing::warn!(error = %e, "PersistenceHook evaluate failed"),
581                }
582            });
583        }
584
585        Ok(result)
586    }
587}
588
589/// Create and run an oxi-sdk `Agent` with CSpace-based tool registration.
590///
591/// Uses `engine.oxi().agent()` (AgentBuilder) for full middleware,
592/// observability, and security integration from oxi-sdk 0.23.0.
593#[allow(clippy::too_many_arguments)]
594async fn run_agent(
595    config: &AgentRuntimeConfig,
596    engine: &OxiosEngine,
597    kernel_handle: Arc<KernelHandle>,
598    system_prompt: String,
599    prompt: String,
600    seed_id: uuid::Uuid,
601    seed_goal: String,
602    agent_id: AgentId,
603    cspace: crate::capability::CSpace,
604    audit_trail: Option<Arc<AuditTrail>>,
605    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
606    session_id: Option<String>,
607    mount_paths: &[std::path::PathBuf],
608) -> Result<(
609    String,
610    usize,
611    bool,
612    Vec<oxios_memory::memory::sona::TrajectoryStep>,
613    Arc<Agent>,
614    Vec<String>,
615    std::collections::HashMap<String, String>,
616    std::collections::HashMap<String, bool>,
617    std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
618    u64,
619    u64,
620)> {
621    // Extract workspace.
622    // RFC-025: prefer the primary Mount's first path, then fall back to the
623    // legacy config.project_paths, then workspace_dir, then temp.
624    let workspace = if !mount_paths.is_empty() {
625        mount_paths[0].clone()
626    } else if !config.project_paths.is_empty() {
627        config.project_paths[0].clone()
628    } else if let Some(ref ws) = config.workspace_dir {
629        ws.clone()
630    } else {
631        std::env::temp_dir()
632            .join("oxios-agent-workspace")
633            .join(agent_id.to_string())
634    };
635
636    // Ensure workspace exists.
637    let _ = std::fs::create_dir_all(&workspace);
638
639    tracing::debug!(workspace = %workspace.display(), "Agent workspace scoped");
640
641    // Ensure all paths the agent might access are in allowed_paths.
642    //
643    // AgentLifecycleManager::ensure_permissions() adds kernel.workspace (~/.oxios/workspace),
644    // but the agent operates in different directories depending on context:
645    //
646    //   1. Process CWD — oxi-sdk 0.35+ bakes `workspace_dir` into file tools
647    //      via `with_cwd`, so ReadTool/LsTool resolve relatives against the
648    //      workspace, NOT the process CWD. However, oxios's own CSpace tools
649    //      (kernel-bridge tools wrapped in GatedTool) and bash/exec
650    //      subprocesses may still resolve against the process CWD. We grant
651    //      it as a safety net so those tools aren't denied by GatedTool.
652    //   2. The designated workspace — computed from mount_paths / workspace_dir / temp.
653    //   3. Kernel workspace — state store path for seeds, sessions, etc.
654    //   4. /tmp -- general temp file access.
655    //
656    // All four must be in allowed_paths before GatedTool wraps any tool.
657    {
658        use crate::access_manager::{Role, Subject};
659        let agent_name = format!("agent-{agent_id}");
660        let mut am = kernel_handle.exec.access_manager().lock();
661        let perms = am.get_or_create_permissions(&agent_name);
662
663        // 1. CWD -- critical: oxi-sdk resolves relative paths here
664        if let Ok(cwd) = std::env::current_dir() {
665            let cwd_pattern = format!("{}/**", cwd.to_string_lossy().trim_end_matches('/'));
666            if !perms.allowed_paths.iter().any(|p| p == &cwd_pattern) {
667                perms.allow_path(&cwd_pattern);
668                tracing::debug!(
669                    agent = %agent_name,
670                    path = %cwd_pattern,
671                    "Added CWD to agent allowed paths"
672                );
673            }
674        }
675
676        // 2. Designated workspace
677        let ws_pattern = format!("{}/**", workspace.to_string_lossy().trim_end_matches('/'));
678        if !perms.allowed_paths.iter().any(|p| p == &ws_pattern) {
679            perms.allow_path(&ws_pattern);
680        }
681
682        // 2b. RFC-025: every bound Mount grants path access.
683        //     This fixes the latent gap where only project_paths[0] was
684        //     accessible — now all Mount paths (multi-path work) are allowed.
685        //     Parent patterns already covering a path are skipped.
686        for mount_path in mount_paths {
687            let pattern = format!("{}/**", mount_path.to_string_lossy().trim_end_matches('/'));
688            if !perms.allowed_paths.iter().any(|p| p == &pattern) {
689                perms.allow_path(&pattern);
690                tracing::debug!(
691                    agent = %agent_name,
692                    path = %pattern,
693                    "Added Mount path to agent allowed paths (RFC-025)"
694                );
695            }
696        }
697
698        // 3. Kernel workspace (state store path)
699        let kernel_ws = kernel_handle
700            .state
701            .workspace_path()
702            .to_string_lossy()
703            .to_string();
704        let kernel_ws_pattern = format!("{}/**", kernel_ws.trim_end_matches('/'));
705        if kernel_ws_pattern != ws_pattern
706            && !perms.allowed_paths.iter().any(|p| p == &kernel_ws_pattern)
707        {
708            perms.allow_path(&kernel_ws_pattern);
709        }
710
711        // 4. /tmp -- for general temp file access
712        if !perms.allowed_paths.iter().any(|p| p == "/tmp/**") {
713            perms.allow_path("/tmp/**");
714        }
715
716        // Ensure RBAC Superuser role so AccessGate Layer 1 passes.
717        let rbac_subject = Subject::Agent(agent_id);
718        am.rbac_manager_mut()
719            .assign_role(rbac_subject, Role::Superuser);
720    }
721
722    // Start distributed trace span for this agent execution.
723    let _trace_guard = crate::observability::tracer().start(
724        format!("seed-{}", &seed_id.to_string()[..8]).as_str(),
725        oxi_sdk::SpanKind::Agent,
726    );
727
728    // ── Register tools based on CSpace (with access gate) ──
729    let registry = ToolRegistry::new();
730    let search_cache = Arc::new(SearchCache::new());
731
732    // Build agent context for security
733    let agent_context = AgentContext {
734        agent_id,
735        agent_name: format!("agent-{agent_id}"),
736        cspace: Arc::new(cspace.clone()),
737    };
738
739    // Build audit sink: TrailAuditSink (Merkle chain + JSONL) when audit_trail
740    // is available, otherwise fall back to TracingAuditSink.
741    let audit_sink: Arc<dyn crate::access_manager::AuditSink> = if let Some(trail) = audit_trail {
742        let audit_path = kernel_handle
743            .state
744            .workspace_path()
745            .join("audit")
746            .join("access.jsonl");
747        Arc::new(TrailAuditSink::new(trail, audit_path))
748    } else {
749        Arc::new(TracingAuditSink)
750    };
751
752    // Build access gate from kernel's security infrastructure
753    let access_gate = Arc::new(AccessGate::new(
754        kernel_handle.exec.access_manager().clone(),
755        Arc::new(kernel_handle.exec.config_snapshot()),
756        audit_sink,
757    ));
758
759    register_tools_from_cspace_gated(
760        &registry,
761        &kernel_handle,
762        &cspace,
763        search_cache,
764        agent_id,
765        access_gate,
766        agent_context,
767    );
768
769    tracing::info!(
770        seed_id = %seed_id,
771        capabilities = cspace.len(),
772        "Tools registered from CSpace"
773    );
774
775    // ── Build AgentConfig ──
776    //
777    // RFC-014 Phase D: `system_prompt` is also passed to the new
778    // `AgentBuilder::system_prompt()` (which overrides the value embedded
779    // in `AgentConfig` at build time). We clone here so the builder path
780    // can consume the value while the legacy `Agent::new_with_resolver`
781    // path still sees it in the config.
782    let agent_config = AgentConfig {
783        name: format!("agent-{agent_id}"),
784        description: None,
785        model_id: config.model_id.clone(),
786        system_prompt: Some(system_prompt.clone()),
787        timeout_seconds: 300,
788        temperature: Some(0.7),
789        max_tokens: Some(8192),
790        compaction_strategy: CompactionStrategy::Threshold(0.8),
791        compaction_instruction: None,
792        context_window: 128_000,
793        api_key: config.api_key.clone(),
794        workspace_dir: Some(workspace.clone()),
795        output_mode: None,
796        provider_options: config.provider_options.clone(),
797        // oxi-sdk 0.37.0+: ownership identity for oxi's built-in ownership-gated
798        // tools (e.g. the `issue` tool's flock). `None` preserves the pre-0.37.1
799        // behavior (ToolContext.session_id == None). Oxios runs its own tool
800        // set, so no ownership identity is needed here; set `Some(...)` only if
801        // oxios agents start using oxi ownership-gated tools.
802        session_id: None,
803    };
804
805    // ── Build Agent (RFC-014 Phase D) ──
806    //
807    // Two paths:
808    //   1. `provider_rpm == 0` (common): use oxi-sdk 0.26.2's new
809    //      `AgentBuilder` API. The builder unifies model resolution, provider
810    //      creation, and (optionally) middleware wiring. Engine-level
811    //      `authorizer` / `tracer` / `cost_tracker` are propagated through
812    //      the new builder methods.
813    //   2. `provider_rpm > 0` (rare): keep the legacy
814    //      `Agent::new_with_resolver` + `set_hooks` path because the
815    //      AgentBuilder does not expose a way to inject a pre-built
816    //      `ProviderPool` for rate-limited access. This is a deliberate
817    //      scope-limit per RFC-014/phase-d-agentbuilder.md §2 "Provider
818    //      선택 로직은 보존".
819    let agent = if config.provider_rpm > 0 {
820        // ── Legacy path: rate-limited provider pool ──
821        let resolver: Arc<dyn ProviderResolver> = Arc::new(engine.oxi().clone());
822        let provider_name = engine.resolve_model(&config.model_id)?.provider;
823        let provider = engine.pooled_provider(&provider_name, config.provider_rpm)?;
824
825        // Build middleware pipeline.
826        let mut pipeline = oxi_sdk::MiddlewarePipeline::new();
827        if config.rate_limit_per_minute > 0 {
828            pipeline = pipeline.push(oxi_sdk::middleware::builtins::RateLimitMiddleware::new(
829                config.rate_limit_per_minute,
830            ));
831        }
832        if config.token_budget > 0 {
833            pipeline = pipeline.push(oxi_sdk::middleware::builtins::TokenBudgetMiddleware::new(
834                config.token_budget,
835            ));
836        }
837        if config.audit_tool_calls {
838            pipeline = pipeline.push(oxi_sdk::middleware::builtins::LoggingMiddleware::new(
839                tracing::Level::INFO,
840            ));
841        }
842
843        // Create Agent with CSpace tool registry and provider resolver.
844        let agent = Arc::new(Agent::new_with_resolver(
845            provider,
846            agent_config,
847            Arc::new(registry),
848            resolver,
849        ));
850
851        // Wire middleware pipeline → AgentHooks.
852        if !pipeline.is_empty() {
853            let terminate_flag = Arc::new(std::sync::atomic::AtomicBool::new(false));
854            let agent_id_for_hooks = agent_id.to_string();
855            let hooks = oxi_sdk::middleware::build_hooks(
856                Arc::new(pipeline),
857                agent_id_for_hooks,
858                terminate_flag,
859            );
860            agent.set_hooks(hooks);
861        }
862
863        agent
864    } else {
865        // ── New path: AgentBuilder (RFC-014 Phase D) ──
866        let mut builder = engine
867            .oxi()
868            .agent(agent_config)
869            .workspace(&workspace)
870            .system_prompt(system_prompt);
871
872        // CSpace-based tool registration is oxios-specific and is preserved.
873        //
874        // The builder's `.tool()` method takes `impl AgentTool + 'static`
875        // (a concrete value), but oxios' CSpace tools are `Arc<dyn AgentTool>`.
876        // The SDK does not expose a way to inject a pre-built `ToolRegistry`
877        // into the builder, so we register them on the agent's tool registry
878        // after `build()` returns. This keeps CSpace semantics intact.
879        //
880        // We capture the tool names now and apply them once the agent exists.
881        let cspace_tool_arcs: Vec<Arc<dyn oxi_sdk::AgentTool>> = registry
882            .names()
883            .into_iter()
884            .filter_map(|name| registry.get(&name))
885            .collect();
886
887        // Engine-level observability/security → AgentBuilder (new API).
888        if let Some(auth) = engine.authorizer() {
889            builder = builder.authorizer(auth.clone());
890        }
891        if let Some(tracer) = engine.tracer() {
892            builder = builder.tracer(tracer.clone());
893        }
894        if let Some(ct) = engine.cost_tracker() {
895            builder = builder.cost_tracker(ct.clone());
896        }
897
898        // Middleware: AgentBuilder convenience helpers replace the manual
899        // `MiddlewarePipeline` + `build_hooks()` + `set_hooks()` triple.
900        if config.rate_limit_per_minute > 0 {
901            builder = builder.with_rate_limit(config.rate_limit_per_minute);
902        }
903        if config.token_budget > 0 {
904            builder = builder.with_token_budget(config.token_budget);
905        }
906        if config.audit_tool_calls {
907            builder = builder.with_logging();
908        }
909
910        let built = builder.build()?;
911        let agent = Arc::new(built);
912
913        // Attach CSpace tools to the agent's tool registry.
914        // `Agent::tools()` returns the same `Arc<ToolRegistry>` that
915        // `AgentBuilder` populated, so `register_arc` is the canonical
916        // extension point for `Arc<dyn AgentTool>` values.
917        let agent_tools = agent.tools();
918        for tool in cspace_tool_arcs {
919            agent_tools.register_arc(tool);
920        }
921
922        agent
923    };
924
925    // Shared mutable state for the event callback.
926    let exec_state = Arc::new(Mutex::new(ExecuteState::default()));
927    let exec_state_cb = Arc::clone(&exec_state);
928    let memory_for_callback: Arc<MemoryManager> = (*kernel_handle.agents.memory_manager()).clone();
929    let session_id_for_callback = seed_id.to_string();
930    let model_id_for_callback = config.model_id.clone();
931    let agent_id_for_callback = agent_id.to_string();
932    let routing_stats_for_cb = routing_stats.clone();
933    // RFC-015: real-time event publishing for chat transparency.
934    // Falls back to None when the caller did not opt in.
935    let transparency_session: Option<String> = session_id.clone();
936    let kernel_handle_for_cb: Arc<KernelHandle> = Arc::clone(&kernel_handle);
937
938    // Run the agent with streaming events.
939    let result = agent
940        .run_streaming(prompt, move |event| {
941            let mut s = exec_state_cb.lock();
942            match event {
943                AgentEvent::ToolExecutionStart {
944                    tool_name,
945                    tool_call_id,
946                    args,
947                    context,
948                    ..
949                } => {
950                    // Record start time and push a placeholder step.
951                    let idx = s.trajectory_steps.len();
952                    s.pending_tools
953                        .insert(tool_call_id.clone(), (std::time::Instant::now(), idx));
954                    s.tool_args_map.insert(
955                        tool_call_id.clone(),
956                        serde_json::to_string(&args).unwrap_or_default(),
957                    );
958                    s.tool_timestamps
959                        .insert(tool_call_id.clone(), chrono::Utc::now());
960                    s.tool_call_ids.push(tool_call_id.clone());
961                    s.trajectory_steps
962                        .push(oxios_memory::memory::sona::TrajectoryStep {
963                            input: tool_name.clone(),
964                            output: String::new(),
965                            duration_ms: 0,
966                            confidence: 0.0,
967                        });
968                    // RFC-015: broadcast tool start so Web UI can show progress.
969                    if let Some(ref sid) = transparency_session {
970                        let context_json = context
971                            .as_ref()
972                            .map(serde_json::to_value)
973                            .transpose()
974                            .unwrap_or(None);
975                        let _ =
976                            kernel_handle_for_cb
977                                .infra
978                                .publish(KernelEvent::ToolExecutionStarted {
979                                    session_id: sid.clone(),
980                                    tool_name: tool_name.clone(),
981                                    tool_call_id: tool_call_id.clone(),
982                                    tool_args: args.clone(),
983                                    context: context_json,
984                                });
985                    }
986                }
987                AgentEvent::ToolExecutionUpdate {
988                    tool_call_id,
989                    tool_name,
990                    partial_result,
991                    tab_id,
992                    context,
993                } => {
994                    // RFC-015: forward real-time progress to the event bus
995                    // so the Web UI can show a spinner and progress text
996                    // while the tool is still executing. Best-effort —
997                    // publish failures (e.g. lagged subscribers) are ignored.
998                    //
999                    // `tab_id` and `context` come from oxi-agent 0.29+
1000                    // (ToolCallContext: PageVisit, WebSearch, etc.).
1001                    // Older agent versions won't send these — they default
1002                    // to None and the UI gracefully ignores them.
1003                    if let Some(ref sid) = transparency_session {
1004                        let context_json = context
1005                            .as_ref()
1006                            .map(serde_json::to_value)
1007                            .transpose()
1008                            .unwrap_or(None);
1009                        let _ = kernel_handle_for_cb.infra.publish(
1010                            KernelEvent::ToolExecutionProgress {
1011                                session_id: sid.clone(),
1012                                tool_call_id: tool_call_id.clone(),
1013                                tool_name: tool_name.clone(),
1014                                progress: partial_result,
1015                                tab_id,
1016                                context: context_json,
1017                            },
1018                        );
1019                    }
1020                }
1021                AgentEvent::ToolExecutionEnd {
1022                    tool_name,
1023                    tool_call_id,
1024                    is_error,
1025                    result,
1026                    ..
1027                } => {
1028                    if !is_error {
1029                        s.steps_completed += 1;
1030                    }
1031                    // Look up the exact step by tool_call_id.
1032                    let mut duration_ms: u64 = 0;
1033                    let mut summary = String::new();
1034                    if let Some((start, idx)) = s.pending_tools.remove(tool_call_id.as_str()) {
1035                        duration_ms = start.elapsed().as_millis() as u64;
1036                        if let Some(step) = s.trajectory_steps.get_mut(idx) {
1037                            summary = summarize_tool_result(&result.content, 200);
1038                            step.output = summary.clone();
1039                            step.duration_ms = duration_ms;
1040                            step.confidence = if is_error { 0.3 } else { 0.8 };
1041                        }
1042                    }
1043                    s.tool_error_map.insert(tool_call_id.clone(), is_error);
1044                    // RFC-015: broadcast tool completion.
1045                    if let Some(ref sid) = transparency_session {
1046                        let _ = kernel_handle_for_cb.infra.publish(
1047                            KernelEvent::ToolExecutionFinished {
1048                                session_id: sid.clone(),
1049                                tool_call_id: tool_call_id.clone(),
1050                                tool_name: tool_name.clone(),
1051                                duration_ms,
1052                                is_error,
1053                                output_summary: summary,
1054                            },
1055                        );
1056                    }
1057                }
1058                AgentEvent::AgentEnd {
1059                    messages,
1060                    stop_reason,
1061                    ..
1062                } => {
1063                    if let Some(oxi_sdk::Message::Assistant(a)) = messages.last() {
1064                        s.final_content = a.text_content();
1065                    }
1066                    // oxi 0.32.0: loop exits naturally when LLM produces text-only
1067                    // response (StopReason::Stop). Error/Aborted = failure.
1068                    // ToolUse should not occur at AgentEnd in 0.32.0 (the loop
1069                    // continues until text-only), but treat it as non-failure
1070                    // since tool calls were executed successfully.
1071                    s.success = matches!(stop_reason.as_deref(), Some("Stop") | Some("ToolUse"));
1072                }
1073                AgentEvent::Error { message, .. } => {
1074                    s.final_content = message.clone();
1075                    s.success = false;
1076                }
1077                AgentEvent::Usage {
1078                    input_tokens,
1079                    output_tokens,
1080                } => {
1081                    // Accumulate totals for ExecutionResult.
1082                    s.total_input_tokens += input_tokens as u64;
1083                    s.total_output_tokens += output_tokens as u64;
1084
1085                    // Record token usage to cost tracker (existing).
1086                    let agent_label = format!("agent-{agent_id_for_callback}");
1087                    crate::observability::cost_tracker().record(
1088                        &agent_label,
1089                        &oxi_sdk::Model::new(
1090                            &model_id_for_callback,
1091                            &model_id_for_callback,
1092                            oxi_sdk::Api::OpenAiCompletions,
1093                            "unknown",
1094                            "https://unknown.com",
1095                        ),
1096                        oxi_sdk::TokenUsage {
1097                            input: input_tokens as u64,
1098                            output: output_tokens as u64,
1099                            cache_read: 0,
1100                            cache_write: 0,
1101                        },
1102                    );
1103
1104                    // Record to routing stats (RFC-011).
1105                    if let Some(stats) = &routing_stats_for_cb {
1106                        let cost = crate::kernel_handle::engine_api::estimate_cost(
1107                            &model_id_for_callback,
1108                            input_tokens as u64,
1109                            output_tokens as u64,
1110                        );
1111                        stats.record_model_usage(&model_id_for_callback, cost);
1112                    }
1113                    // RFC-015: publish cumulative token usage.
1114                    if let Some(ref sid) = transparency_session {
1115                        let _ = kernel_handle_for_cb
1116                            .infra
1117                            .publish(KernelEvent::TokenUsageUpdate {
1118                                session_id: sid.clone(),
1119                                input_tokens: input_tokens as u64,
1120                                output_tokens: output_tokens as u64,
1121                            });
1122                    }
1123                }
1124                AgentEvent::Compaction {
1125                    event: CompactionEvent::Completed { result, .. },
1126                } => {
1127                    handle_compaction(
1128                        result.summary.clone(),
1129                        session_id_for_callback.clone(),
1130                        memory_for_callback.clone(),
1131                    );
1132                    // RFC-015: compaction is a form of reasoning — expose it.
1133                    if let Some(ref sid) = transparency_session {
1134                        let _ =
1135                            kernel_handle_for_cb
1136                                .infra
1137                                .publish(KernelEvent::ReasoningFragment {
1138                                    session_id: sid.clone(),
1139                                    content: result.summary.clone(),
1140                                    source: "compaction".to_string(),
1141                                });
1142                    }
1143                }
1144                _ => {}
1145            }
1146        })
1147        .await;
1148
1149    // Record circuit breaker result after agent execution.
1150    let circuit = get_llm_circuit_breaker();
1151    if result.is_err() {
1152        circuit.record_failure();
1153        crate::metrics::get_metrics()
1154            .llm_circuit_breaker_state
1155            .set(1.0);
1156    } else {
1157        circuit.record_success();
1158        crate::metrics::get_metrics()
1159            .llm_circuit_breaker_state
1160            .set(0.0);
1161    }
1162
1163    if let Err(e) = result {
1164        tracing::error!(seed_id = %seed_id, error = %e, "Agent failed");
1165        let s = exec_state.lock();
1166        return Ok((
1167            format!("Agent failed: {e}"),
1168            s.steps_completed,
1169            false,
1170            s.trajectory_steps.clone(),
1171            agent,
1172            s.tool_call_ids.clone(),
1173            s.tool_args_map.clone(),
1174            s.tool_error_map.clone(),
1175            s.tool_timestamps.clone(),
1176            s.total_input_tokens,
1177            s.total_output_tokens,
1178        ));
1179    }
1180
1181    let s = exec_state.lock();
1182    tracing::info!(
1183        seed_id = %seed_id,
1184        steps = s.steps_completed,
1185        success = s.success,
1186        "Agent completed"
1187    );
1188
1189    // Record trajectory to SONA learning engine (RFC-020 Phase 2).
1190    // Fire-and-forget: don't block the result on learning.
1191    if !s.trajectory_steps.is_empty()
1192        && let Some(sona) = kernel_handle.agents.memory_manager().sona_engine()
1193    {
1194        let steps = s.trajectory_steps.clone();
1195        let success = s.success;
1196        let sona = Arc::clone(sona);
1197        let domain = infer_domain(&seed_goal);
1198        tokio::spawn(async move {
1199            let verdict = if success {
1200                oxios_memory::memory::sona::Verdict::Success
1201            } else {
1202                oxios_memory::memory::sona::Verdict::Failure
1203            };
1204            let trajectory = oxios_memory::memory::sona::Trajectory::new(steps, verdict, &domain);
1205            if let Err(e) = sona.record(trajectory).await {
1206                tracing::debug!(error = %e, "SONA trajectory recording failed (non-fatal)");
1207            }
1208        });
1209    }
1210
1211    Ok((
1212        s.final_content.clone(),
1213        s.steps_completed,
1214        s.success,
1215        s.trajectory_steps.clone(),
1216        agent,
1217        s.tool_call_ids.clone(),
1218        s.tool_args_map.clone(),
1219        s.tool_error_map.clone(),
1220        s.tool_timestamps.clone(),
1221        s.total_input_tokens,
1222        s.total_output_tokens,
1223    ))
1224}
1225
1226/// Summarize a tool result string to fit within `max_len` characters.
1227///
1228/// Uses char-aware truncation to avoid panicking on multi-byte UTF-8
1229/// (e.g., Korean, CJK, emoji).
1230fn summarize_tool_result(result: &str, max_len: usize) -> String {
1231    let trimmed = result.trim();
1232    if trimmed.chars().count() <= max_len {
1233        return trimmed.to_string();
1234    }
1235    // Take the first line or truncate.
1236    let first_line = trimmed.lines().next().unwrap_or("");
1237    if first_line.chars().count() <= max_len {
1238        first_line.to_string()
1239    } else {
1240        let take = max_len.saturating_sub(3);
1241        let truncated: String = if take == 0 {
1242            first_line.chars().take(max_len).collect()
1243        } else {
1244            first_line.chars().take(take).collect()
1245        };
1246        format!("{truncated}...")
1247    }
1248}
1249fn truncate_json_str(json_str: &str, max_len: usize) -> String {
1250    if json_str.len() <= max_len {
1251        return json_str.to_string();
1252    }
1253    // Saturating sub avoids underflow panic when max_len < 3; if there
1254    // isn't room for an ellipsis, return as many chars as fit.
1255    let take = max_len.saturating_sub(3);
1256    if take == 0 {
1257        return json_str.chars().take(max_len).collect();
1258    }
1259    let truncated: String = json_str.chars().take(take).collect();
1260    format!("{truncated}...")
1261}
1262
1263/// Infer a domain category from a seed goal for SONA trajectory grouping.
1264///
1265/// Extracts the core verb + object from the goal to create a meaningful
1266/// domain label. Falls back to "general" for unrecognizable patterns.
1267fn infer_domain(goal: &str) -> String {
1268    let lower = goal.to_lowercase();
1269    let keywords: Vec<&str> = lower.split_whitespace().take(8).collect();
1270
1271    // Check for known domain indicators.
1272    if keywords.iter().any(|k| {
1273        [
1274            "test",
1275            "tests",
1276            "spec",
1277            "testing",
1278            "assert",
1279            "unit test",
1280            "integration",
1281        ]
1282        .contains(k)
1283    }) {
1284        return "testing".to_string();
1285    }
1286    if keywords
1287        .iter()
1288        .any(|k| ["deploy", "release", "publish", "ship"].contains(k))
1289    {
1290        return "deployment".to_string();
1291    }
1292    if keywords
1293        .iter()
1294        .any(|k| ["fix", "bug", "patch", "repair", "debug"].contains(k))
1295    {
1296        return "bugfix".to_string();
1297    }
1298    if keywords
1299        .iter()
1300        .any(|k| ["refactor", "restructure", "reorganize", "rewrite"].contains(k))
1301    {
1302        return "refactoring".to_string();
1303    }
1304    if keywords
1305        .iter()
1306        .any(|k| ["doc", "document", "readme", "guide", "explain"].contains(k))
1307    {
1308        return "documentation".to_string();
1309    }
1310    if keywords
1311        .iter()
1312        .any(|k| ["build", "create", "implement", "add", "make", "new"].contains(k))
1313    {
1314        return "development".to_string();
1315    }
1316    if keywords
1317        .iter()
1318        .any(|k| ["analyze", "review", "audit", "inspect", "check"].contains(k))
1319    {
1320        return "analysis".to_string();
1321    }
1322    if keywords
1323        .iter()
1324        .any(|k| ["config", "setup", "install", "configure", "init"].contains(k))
1325    {
1326        return "configuration".to_string();
1327    }
1328
1329    // Fallback: first 2 meaningful words
1330    let meaningful: Vec<&str> = lower
1331        .split_whitespace()
1332        .filter(|w| w.len() > 2)
1333        .take(2)
1334        .collect();
1335    if meaningful.len() >= 2 {
1336        meaningful.join("_")
1337    } else {
1338        "general".to_string()
1339    }
1340}
1341
1342/// Handle compaction completion by storing the summary as a Warm memory.
1343///
1344/// Extracts the compaction summary from the event and spawns a background
1345/// task to persist it via MemoryManager. This replaces the inline 30-line
1346/// block that was previously in the event callback.
1347fn handle_compaction(summary: String, session_id: String, memory_manager: Arc<MemoryManager>) {
1348    let entry = MemoryEntry {
1349        id: uuid::Uuid::new_v4().to_string(),
1350        memory_type: MemoryType::Conversation,
1351        tier: crate::memory::MemoryTier::Warm,
1352        content: summary,
1353        content_hash: 0,
1354        source: "compaction".to_string(),
1355        session_id: Some(session_id),
1356        tags: vec![],
1357        importance: 0.5,
1358        pinned: false,
1359        protection: crate::memory::ProtectionLevel::None,
1360        auto_classified: false,
1361        session_appearances: 0,
1362        user_corrected: false,
1363        seen_in_sessions: vec![],
1364        created_at: chrono::Utc::now(),
1365        accessed_at: chrono::Utc::now(),
1366        modified_at: chrono::Utc::now(),
1367        access_count: 0,
1368        decay_score: 1.0,
1369        compaction_level: 0,
1370        compacted_from: vec![],
1371        related_ids: vec![],
1372        contradicts: None,
1373    };
1374    tokio::spawn(async move {
1375        if let Err(e) = memory_manager.remember(entry).await {
1376            tracing::warn!(error = %e, "Failed to save compaction summary");
1377        }
1378    });
1379}
1380
1381/// Build a system prompt from the Seed's goal, constraints, persona,
1382/// and optionally a capability index and kernel manifest.
1383///
1384/// Note: SKILL.md content is no longer injected here. Capabilities are
1385/// surfaced through the CSpace tool set + semantic retrieval instead.
1386fn build_system_prompt(
1387    seed: &Seed,
1388    persona_prompt: Option<&str>,
1389    capabilities_xml: Option<&str>,
1390    kernel_manifest: Option<&str>,
1391    workspace_context: Option<&str>,
1392) -> String {
1393    let mut prompt = String::from(
1394        "You are an autonomous agent in the Oxios operating system.\n\
1395         You execute Seeds — immutable specifications with goals, constraints, and\n\
1396         acceptance criteria.\n\n\
1397         ## Available Tools\n\
1398         You have the following tools:\n\
1399         - **File tools**: read, write, edit files; grep, find, ls for searching\n\
1400         - **Web tools**: web_search for searching the web, get_search_results for retrieving cached results\n\
1401         - **Exec**: run shell commands\n\
1402         - **Memory tools**: memory_read, memory_write, memory_search — agent's internal recall\n\
1403         - **Knowledge**: knowledge — personal markdown vault for documents and notes\n\
1404         - **Kernel tools**: agent, project, persona, cron, security, budget, resource\n\n\
1405         **Important**: When the task involves fetching information from the internet,\n\
1406         websites, or online services, use `web_search` first — do NOT search local files.\n\
1407         When the task asks to \"get\", \"fetch\", \"find online\", or \"look up\" something\n\
1408         from the web, use `web_search`.\n",
1409    );
1410    prompt.push_str(&format!("\n## Goal\n{}\n", seed.goal));
1411
1412    // Preserve user's original wording so the agent sees exact language,
1413    // filenames, and nuances that may have been abstracted in the goal.
1414    if !seed.original_request.is_empty() && seed.original_request != seed.goal {
1415        prompt.push_str(&format!(
1416            "\n## User's Original Request\n{}\n",
1417            seed.original_request
1418        ));
1419    }
1420
1421    if !seed.constraints.is_empty() {
1422        prompt.push_str("\n## Constraints\n");
1423        for (i, c) in seed.constraints.iter().enumerate() {
1424            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1425        }
1426    }
1427
1428    if !seed.acceptance_criteria.is_empty() {
1429        prompt.push_str("\n## Acceptance Criteria\n");
1430        for (i, c) in seed.acceptance_criteria.iter().enumerate() {
1431            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1432        }
1433    }
1434
1435    // ── Workspace Context (RFC-025) ──
1436    // Inject active Mounts + project instructions AFTER the goal/constraints
1437    // and BEFORE the persona, so the agent sees its workspace before it acts.
1438    if let Some(ctx) = workspace_context.filter(|s| !s.trim().is_empty()) {
1439        prompt.push_str("\n## Workspace Context\n");
1440        prompt.push_str(ctx);
1441        prompt.push('\n');
1442    }
1443
1444    if !seed.ontology.is_empty() {
1445        prompt.push_str("\n## Domain Entities\n");
1446        for e in &seed.ontology {
1447            prompt.push_str(&format!(
1448                "- **{}** ({}): {}\n",
1449                e.name, e.entity_type, e.description
1450            ));
1451        }
1452    }
1453
1454    // Inject persona system prompt
1455    if let Some(pp) = persona_prompt {
1456        prompt.push_str("\n## Persona\n");
1457        prompt.push_str(pp);
1458        prompt.push('\n');
1459    }
1460
1461    // Inject semantic capability index (from ToolRetriever)
1462    if let Some(xml) = capabilities_xml {
1463        prompt.push_str("\n## Available Capabilities\n");
1464        prompt.push_str("The following capabilities are relevant to your goal. ");
1465        prompt.push_str("Use the `read` tool to load SKILL.md for any program.\n\n");
1466        prompt.push_str(xml);
1467        prompt.push('\n');
1468    }
1469
1470    // Inject kernel manifest (from CSpace)
1471    if let Some(manifest) = kernel_manifest {
1472        prompt.push('\n');
1473        prompt.push_str(manifest);
1474        prompt.push('\n');
1475    }
1476
1477    // Execution environment guidance
1478    prompt.push_str(
1479        "\n## Execution Protocol\n\
1480         1. UNDERSTAND — Read the Seed completely before acting.\n\
1481         2. PLAN — Determine the minimal set of actions needed.\n\
1482         3. EXECUTE — Use tools to accomplish the goal. Prefer the simplest approach.\n\
1483         4. VERIFY — After each action, check the result: created a file? read it back.\n\
1484         5. REPORT — Summarize how each acceptance criterion was met, with evidence.\n\n\
1485         ## Hard Boundaries\n\
1486         - NEVER modify files outside the workspace scope\n\
1487         - NEVER execute destructive commands without confirming scope\n\
1488         - NEVER claim completion without evidence — show the output, not your opinion\n\
1489         - NEVER add features or improvements beyond the Seed scope\n\
1490         - If you cannot complete the Seed, say so and explain WHY\n\n\
1491         ## Scope Guard\n\
1492         The Seed defines your universe. Do not:\n\
1493         - Refactor code the Seed didn't mention\n\
1494         - Add tests the Seed didn't require\n\
1495         - Change configuration the Seed didn't specify\n\
1496         - \"Improve\" anything beyond what the acceptance criteria demand\n\n\
1497         ## Error Handling\n\
1498         - If a tool fails, read the error message carefully before retrying\n\
1499         - If a command fails, do NOT immediately retry with --force or sudo\n\
1500         - If stuck after 3 attempts, report the blocker rather than continuing to fail\n\n\
1501         ## Shape Matching\n\
1502         Match your output to the task: simple task → concise response.\n\
1503         Do not write 50 lines when 5 would do.\n\
1504         Use `exec` for all command execution (git, gh, osascript, etc.).",
1505    );
1506
1507    prompt
1508}
1509
1510/// Build the user prompt from the seed.
1511fn build_user_prompt(seed: &Seed) -> String {
1512    format!(
1513        "Execute the following goal:\n\n{}\n\nAcceptance criteria:\n{}",
1514        seed.goal,
1515        seed.acceptance_criteria
1516            .iter()
1517            .enumerate()
1518            .map(|(i, c)| format!("{}. {}", i + 1, c))
1519            .collect::<Vec<_>>()
1520            .join("\n")
1521    )
1522}
1523
1524impl std::fmt::Debug for AgentRuntime {
1525    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1526        f.debug_struct("AgentRuntime")
1527            .field("model_id", &self.engine_handle.get().default_model_id())
1528            .finish()
1529    }
1530}
1531
1532#[cfg(test)]
1533mod tests {
1534    use super::*;
1535    use async_trait::async_trait;
1536    use oxi_sdk::{AgentTool, ToolContext, ToolError};
1537    use oxios_ouroboros::Entity;
1538    use serde_json::Value;
1539
1540    /// A test tool that does nothing — used to populate the registry.
1541    struct DummyTool {
1542        name: String,
1543    }
1544
1545    #[async_trait]
1546    impl AgentTool for DummyTool {
1547        fn name(&self) -> &str {
1548            &self.name
1549        }
1550        fn label(&self) -> &str {
1551            &self.name
1552        }
1553        fn description(&self) -> &str {
1554            "Test tool"
1555        }
1556        fn parameters_schema(&self) -> Value {
1557            serde_json::json!({"type": "object"})
1558        }
1559
1560        async fn execute(
1561            &self,
1562            _tool_call_id: &str,
1563            _params: Value,
1564            _shutdown: Option<tokio::sync::oneshot::Receiver<()>>,
1565            _ctx: &ToolContext,
1566        ) -> Result<oxi_sdk::AgentToolResult, ToolError> {
1567            Ok(oxi_sdk::AgentToolResult::success("ok"))
1568        }
1569    }
1570
1571    /// Test that requires_tools validation passes when all tools are present.
1572    #[test]
1573    fn test_requires_tools_validation_passes() {
1574        let registry = ToolRegistry::new();
1575
1576        registry.register(DummyTool {
1577            name: "read".into(),
1578        });
1579        registry.register(DummyTool {
1580            name: "exec".into(),
1581        });
1582
1583        let missing = registry.missing(&["read", "exec"]);
1584
1585        assert!(
1586            missing.is_empty(),
1587            "Expected no missing tools, got: {:?}",
1588            missing
1589        );
1590    }
1591
1592    /// Test that requires_tools validation fails when a tool is missing.
1593    #[test]
1594    fn test_requires_tools_validation_fails() {
1595        let registry = ToolRegistry::new();
1596
1597        registry.register(DummyTool {
1598            name: "read".into(),
1599        });
1600
1601        let missing = registry.missing(&["read", "exec", "nonexistent"]);
1602
1603        assert_eq!(missing, vec!["exec", "nonexistent"]);
1604    }
1605
1606    #[test]
1607    fn test_build_system_prompt_includes_goal() {
1608        let seed = Seed {
1609            id: uuid::Uuid::new_v4(),
1610            goal: "Build a web server".into(),
1611            constraints: vec!["Must use Rust".into()],
1612            acceptance_criteria: vec!["Server responds to requests".into()],
1613            ontology: vec![Entity {
1614                name: "HttpServer".into(),
1615                entity_type: "struct".into(),
1616                description: "The main server struct".into(),
1617            }],
1618            created_at: chrono::Utc::now(),
1619            generation: 0,
1620            parent_seed_id: None,
1621            cspace_hint: None,
1622            original_request: String::new(),
1623            output_schema: None,
1624            project_id: None,
1625            workspace_context: None,
1626            mount_paths: Vec::new(),
1627        };
1628
1629        let prompt = build_system_prompt(&seed, None, None, None, None);
1630
1631        assert!(prompt.contains("Build a web server"));
1632        assert!(prompt.contains("Must use Rust"));
1633        assert!(prompt.contains("Server responds to requests"));
1634        assert!(prompt.contains("HttpServer"));
1635        assert!(prompt.contains("struct"));
1636    }
1637
1638    #[test]
1639    fn test_build_system_prompt_empty() {
1640        let seed = Seed {
1641            id: uuid::Uuid::new_v4(),
1642            goal: "Test goal".into(),
1643            constraints: vec![],
1644            acceptance_criteria: vec![],
1645            ontology: vec![],
1646            created_at: chrono::Utc::now(),
1647            generation: 0,
1648            parent_seed_id: None,
1649            cspace_hint: None,
1650            original_request: String::new(),
1651            output_schema: None,
1652            project_id: None,
1653            workspace_context: None,
1654            mount_paths: Vec::new(),
1655        };
1656
1657        let prompt = build_system_prompt(&seed, None, None, None, None);
1658
1659        assert!(prompt.contains("Test goal"));
1660    }
1661
1662    #[test]
1663    fn test_infer_domain_testing() {
1664        assert_eq!(infer_domain("run all unit tests for the kernel"), "testing");
1665    }
1666
1667    #[test]
1668    fn test_infer_domain_deployment() {
1669        assert_eq!(
1670            infer_domain("deploy the web service to production"),
1671            "deployment"
1672        );
1673    }
1674
1675    #[test]
1676    fn test_infer_domain_bugfix() {
1677        assert_eq!(infer_domain("fix the null pointer error in main"), "bugfix");
1678    }
1679
1680    #[test]
1681    fn test_infer_domain_development() {
1682        assert_eq!(
1683            infer_domain("create a new REST API endpoint"),
1684            "development"
1685        );
1686    }
1687
1688    #[test]
1689    fn test_infer_domain_analysis() {
1690        assert_eq!(
1691            infer_domain("review the code for security issues"),
1692            "analysis"
1693        );
1694    }
1695
1696    #[test]
1697    fn test_infer_domain_fallback() {
1698        let domain = infer_domain("optimize performance metrics");
1699        // Should fall back to first 2 meaningful words
1700        assert!(!domain.is_empty());
1701    }
1702}
oxios_kernel/agent_runtime.rs

oxios_kernel/
agent_runtime.rs