oxios_kernel/
agent_runtime.rs

1//! Agent runtime: wraps oxi-sdk's Agent for Seed execution.
2//!
3//! The AgentRuntime uses `OxiosEngine.oxi().agent()` (AgentBuilder pattern)
4//! to construct agents with full middleware, observability, and security
5//! integration from oxi-sdk 0.24.0.
6//!
7//! # Architecture
8//!
9//! All tool access goes through `KernelHandle` — the single syscall-table-like
10//! path for agent OS control. The runtime:
11//!
12//! 1. Resolves the agent's CSpace from persona/role/hint
13//! 2. Registers tools via `register_tools_from_cspace()`
14//! 3. Optionally queries `ToolRetriever` for semantic capability hints
15//! 4. Builds an `Agent` via `AgentBuilder` with middleware pipeline
16//! 5. Runs via `Agent::run_streaming()` for real-time event processing
17//!
18//! # oxi-sdk 0.23.0 Integration
19//!
20//! Uses `AgentBuilder` for agent construction with:
21//! - `.with_rate_limit()` — tool call rate limiting
22//! - `.with_token_budget()` — per-execution token caps
23//! - `.tracer()` / `.cost_tracker()` — observability hooks
24//! ## Routing integration (RFC-011)
25//!
26//! Model usage events (`AgentEvent::Usage`) are recorded to the shared
27//! `RoutingStats` so the Web dashboard can display per-model call counts
28//! and estimated costs.
29
30use anyhow::Result;
31use oxi_sdk::observability::AuditTrail;
32use oxi_sdk::{
33    Agent, AgentConfig, AgentEvent, CompactionEvent, CompactionStrategy, ProviderResolver,
34};
35use oxi_sdk::{SearchCache, ToolExecutionMode, ToolRegistry};
36use parking_lot::Mutex;
37use std::collections::HashMap;
38use std::sync::Arc;
39// RFC-014 Phase D: `ToolRegistry::register_arc` is used in the AgentBuilder
40// path to attach CSpace tools after `builder.build()` returns.
41
42use crate::access_manager::{AccessGate, AgentContext, TracingAuditSink, TrailAuditSink};
43use crate::capability::resolve::resolve_cspace;
44use crate::engine::OxiosEngine;
45use crate::memory::{MemoryEntry, MemoryManager, MemoryType};
46use crate::persona::PersonaManager;
47use crate::tools::registration::register_tools_from_cspace_gated;
48
49use crate::KernelHandle;
50use crate::event_bus::KernelEvent;
51use crate::session_context::SessionContext;
52use crate::types::AgentId;
53use oxios_ouroboros::{Directive, Entity, ExecEnv, ExecutionResult, Seed};
54
55/// Global LLM circuit breaker instance — delegates to oxi-sdk's ProviderCircuitBreaker.
56static LLM_CIRCUIT_BREAKER: std::sync::OnceLock<oxi_sdk::ProviderCircuitBreaker> =
57    std::sync::OnceLock::new();
58
59/// Get the global LLM circuit breaker.
60fn get_llm_circuit_breaker() -> &'static oxi_sdk::ProviderCircuitBreaker {
61    LLM_CIRCUIT_BREAKER.get_or_init(|| {
62        oxi_sdk::ProviderCircuitBreaker::new(
63            "global".to_string(),
64            oxi_sdk::CircuitBreakerConfig::default(),
65        )
66    })
67}
68
69/// Configuration for creating AgentRuntime instances.
70#[derive(Debug, Clone)]
71pub struct AgentRuntimeConfig {
72    /// Model ID in `provider/model` format (e.g. `anthropic/claude-sonnet-4-20250514`).
73    pub model_id: String,
74    /// How to execute tool calls within a single turn.
75    pub tool_execution: ToolExecutionMode,
76    /// Whether auto-retry is enabled for retryable LLM errors.
77    pub auto_retry_enabled: bool,
78    /// Bound project paths. AgentRuntime sets CWD to paths[0].
79    pub project_paths: Vec<std::path::PathBuf>,
80    /// Scratch workspace directory for temp files.
81    pub workspace_dir: Option<std::path::PathBuf>,
82    /// API key resolved from CredentialStore at build time.
83    pub api_key: Option<String>,
84    /// Per-provider options for fine-grained control.
85    pub provider_options: Option<oxi_sdk::ProviderOptions>,
86    /// Rate limit for tool calls (requests per minute). 0 = unlimited.
87    pub rate_limit_per_minute: usize,
88    /// Token budget per agent execution. 0 = unlimited.
89    pub token_budget: usize,
90    /// Enable audit logging for all tool executions.
91    pub audit_tool_calls: bool,
92    /// Provider-level RPM for rate-limited provider pool. 0 = no pooling.
93    /// When set, uses `OxiosEngine::pooled_provider()` instead of `create_provider()`.
94    pub provider_rpm: u32,
95}
96
97impl Default for AgentRuntimeConfig {
98    fn default() -> Self {
99        Self {
100            model_id: String::new(),
101            tool_execution: ToolExecutionMode::Parallel,
102            auto_retry_enabled: true,
103            project_paths: Vec::new(),
104            workspace_dir: None,
105            api_key: None,
106            provider_options: None,
107            rate_limit_per_minute: 0,
108            token_budget: 0,
109            audit_tool_calls: false,
110            provider_rpm: 0,
111        }
112    }
113}
114
115/// Mutable state shared between the event callback and the main execute flow.
116#[derive(Default)]
117struct ExecuteState {
118    final_content: String,
119    steps_completed: usize,
120    success: bool,
121    /// Collected trajectory steps for SONA learning (RFC-020 Phase 2).
122    /// Ordered by insertion — parallel tools get their final position
123    /// resolved when they complete, preserving approximate execution order.
124    trajectory_steps: Vec<oxios_memory::memory::sona::TrajectoryStep>,
125    /// Map of tool_call_id → (start instant, index into trajectory_steps).
126    /// Used to correlate ToolExecutionEnd with the correct step when
127    /// parallel tool calls complete out of order.
128    pending_tools: std::collections::HashMap<String, (std::time::Instant, usize)>,
129    /// Ordered tool_call_ids matching trajectory_steps indices.
130    /// Pushed in ToolExecutionStart, same order as trajectory_steps.
131    tool_call_ids: Vec<String>,
132    /// Per-step tool args (JSON string) captured from ToolExecutionStart.
133    tool_args_map: std::collections::HashMap<String, String>,
134    /// Per-step error flag from ToolExecutionEnd.
135    tool_error_map: std::collections::HashMap<String, bool>,
136    /// Per-step start timestamp (UTC) from ToolExecutionStart.
137    tool_timestamps: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
138    /// Cumulative input tokens from AgentEvent::Usage.
139    total_input_tokens: u64,
140    /// Cumulative output tokens from AgentEvent::Usage.
141    total_output_tokens: u64,
142}
143
144/// Runtime that wraps an oxi-sdk `Agent` for executing Seeds.
145///
146/// Each call to [`AgentRuntime::execute`] creates a fresh `Agent`,
147/// builds a ToolRegistry based on the agent's CSpace, and runs it to completion.
148///
149/// All OS-level access goes through `KernelHandle` — the single syscall table
150/// for agent control. Provider/model resolution goes through `EngineHandle`,
151/// which returns the latest `OxiosEngine` (hot-swapped on config change).
152pub struct AgentRuntime {
153    engine_handle: Arc<crate::engine::EngineHandle>,
154    config: AgentRuntimeConfig,
155    /// Single path to all kernel services.
156    kernel_handle: Arc<KernelHandle>,
157    /// Persona manager for system prompt injection.
158    persona_manager: Option<Arc<PersonaManager>>,
159    /// Semantic tool retriever for capability discovery.
160    tool_retriever: Option<Arc<crate::tools::retrieval::ToolRetriever>>,
161    /// Shared routing stats (shared with EngineApi).
162    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
163    /// Autonomous persistence hook (RFC-016).
164    persistence_hook: Option<Arc<crate::persistence_hook::PersistenceHook>>,
165    /// Per-session assistant message index counter (RFC-016).
166    session_msg_counter: Arc<Mutex<HashMap<String, usize>>>,
167}
168
169impl AgentRuntime {
170    /// Creates a new agent runtime with engine handle and kernel access.
171    ///
172    /// The active model is resolved live from `engine_handle` on each
173    /// `execute()` (reads the post-hot-swap default) — there is no frozen
174    /// model id at construction. Tool access goes through `kernel_handle`.
175    pub fn new(
176        engine_handle: Arc<crate::engine::EngineHandle>,
177        kernel_handle: Arc<KernelHandle>,
178        routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
179    ) -> Self {
180        Self {
181            engine_handle,
182            config: AgentRuntimeConfig::default(),
183            kernel_handle,
184            persona_manager: None,
185            tool_retriever: None,
186            routing_stats,
187            persistence_hook: None,
188            session_msg_counter: Arc::new(Mutex::new(HashMap::new())),
189        }
190    }
191
192    /// Attach a PersonaManager for persona system prompt injection.
193    pub fn with_persona_manager(mut self, pm: Arc<PersonaManager>) -> Self {
194        self.persona_manager = Some(pm);
195        self
196    }
197
198    /// Set the runtime config (overrides defaults).
199    pub fn with_config(mut self, config: AgentRuntimeConfig) -> Self {
200        self.config = config;
201        self
202    }
203
204    /// Attach a ToolRetriever for semantic capability discovery.
205    pub fn with_tool_retriever(
206        mut self,
207        retriever: Arc<crate::tools::retrieval::ToolRetriever>,
208    ) -> Self {
209        self.tool_retriever = Some(retriever);
210        self
211    }
212
213    /// Attach a PersistenceHook for autonomous persistence (RFC-016).
214    pub fn with_persistence_hook(
215        mut self,
216        hook: Arc<crate::persistence_hook::PersistenceHook>,
217    ) -> Self {
218        self.persistence_hook = Some(hook);
219        self
220    }
221
222    /// Execute a Seed by running the tool-calling agent to completion.
223    ///
224    /// 1. Resolves CSpace from persona/role/hint
225    /// 2. Registers tools via CSpace
226    /// 3. Recalls memories if available
227    /// 4. Creates Agent via `Agent::new_with_resolver()`
228    /// 5. Runs via `Agent::run_streaming()`
229    pub async fn execute(
230        &self,
231        agent_id: AgentId,
232        seed: &Seed,
233        session_ctx: &mut SessionContext,
234    ) -> Result<ExecutionResult> {
235        // RFC-015: session_id is derived from seed.id for chat transparency
236        // event publishing. Most callers run one Seed per session turn, so
237        // seed.id is a usable session identifier.
238        let session_id: Option<String> = Some(seed.id.to_string());
239        self.execute_with_session(agent_id, seed, session_ctx, session_id)
240            .await
241    }
242
243    /// Like [`execute`](Self::execute) but with an explicit session_id for
244    /// RFC-015 chat transparency event publishing.
245    pub async fn execute_with_session(
246        &self,
247        agent_id: AgentId,
248        seed: &Seed,
249        session_ctx: &mut SessionContext,
250        session_id: Option<String>,
251    ) -> Result<ExecutionResult> {
252        self.execute_inner(
253            agent_id,
254            &seed.goal,
255            &seed.original_request,
256            &seed.constraints,
257            &seed.acceptance_criteria,
258            &seed.ontology,
259            seed.cspace_hint.as_deref(),
260            &seed.mount_paths,
261            seed.workspace_context.as_deref(),
262            session_ctx,
263            session_id,
264            Some(seed),
265        )
266        .await
267    }
268
269    /// Execute a Directive with its ExecEnv (RFC-027 unified intent handling).
270    ///
271    /// Maps Directive/ExecEnv fields to the agent's runtime inputs and runs
272    /// the same tool-calling loop as [`execute`](Self::execute). The
273    /// persistence hook (RFC-016) is currently skipped on this path because
274    /// it still expects a `&Seed`; Phase 6 will update it to accept a
275    /// `&Directive`.
276    pub async fn execute_directive(
277        &self,
278        agent_id: AgentId,
279        directive: &Directive,
280        env: &ExecEnv,
281        session_ctx: &mut SessionContext,
282    ) -> Result<ExecutionResult> {
283        // Directive has no stable per-execution ID yet (Phase 6). Derive a
284        // session_id from the agent_id so chat transparency events still
285        // correlate.
286        let session_id: Option<String> = Some(agent_id.to_string());
287        self.execute_directive_with_session(agent_id, directive, env, session_ctx, session_id)
288            .await
289    }
290
291    /// Like [`execute_directive`](Self::execute_directive) but with an
292    /// explicit session_id for RFC-015 chat transparency event publishing.
293    pub async fn execute_directive_with_session(
294        &self,
295        agent_id: AgentId,
296        directive: &Directive,
297        env: &ExecEnv,
298        session_ctx: &mut SessionContext,
299        session_id: Option<String>,
300    ) -> Result<ExecutionResult> {
301        let ontology: &[Entity] = &[];
302        self.execute_inner(
303            agent_id,
304            &directive.goal,
305            &directive.original_request,
306            &directive.constraints,
307            &directive.acceptance_criteria,
308            ontology,
309            env.cspace_hint.as_deref(),
310            &env.mount_paths,
311            env.workspace_context.as_deref(),
312            session_ctx,
313            session_id,
314            None,
315        )
316        .await
317    }
318
319    /// Shared execution body for Seed and Directive paths.
320    ///
321    /// Performs the full agent-runtime pipeline: prompt assembly, capability
322    /// retrieval, memory + knowledge recall, CSpace tool registration,
323    /// model resolution, agent run, post-execution summary, and (Seed path
324    /// only) the autonomous persistence hook. Directive callers pass
325    /// `persistence_seed = None` to skip persistence until Phase 6.
326    #[allow(clippy::too_many_arguments)]
327    async fn execute_inner(
328        &self,
329        agent_id: AgentId,
330        goal: &str,
331        original_request: &str,
332        constraints: &[String],
333        acceptance_criteria: &[String],
334        ontology: &[Entity],
335        cspace_hint: Option<&str>,
336        mount_paths: &[std::path::PathBuf],
337        workspace_context: Option<&str>,
338        session_ctx: &mut SessionContext,
339        session_id: Option<String>,
340        persistence_seed: Option<&Seed>,
341    ) -> Result<ExecutionResult> {
342        let prompt = build_user_prompt_inner(goal, acceptance_criteria);
343
344        // Get active persona system prompt.
345        let persona_prompt = self
346            .persona_manager
347            .as_ref()
348            .map(|pm| pm.active_system_prompt())
349            .filter(|s| !s.trim().is_empty());
350
351        // Determine persona role for CSpace resolution.
352        let persona_role = self
353            .persona_manager
354            .as_ref()
355            .and_then(|pm| pm.get_active_persona().map(|p| p.role.clone()));
356
357        // Resolve CSpace from persona role, hint, or default.
358        let cspace = resolve_cspace(
359            cspace_hint,
360            persona_role.as_deref(),
361            Some("worker"),
362            agent_id,
363        );
364
365        // Build system prompt (without SKILL.md injection — capabilities are
366        // surfaced through the CSpace tool set + semantic retrieval instead).
367        let mut system_prompt = build_system_prompt_inner(
368            goal,
369            original_request,
370            constraints,
371            acceptance_criteria,
372            ontology,
373            workspace_context,
374            persona_prompt.as_deref(),
375            None,
376            None,
377        );
378
379        // Semantic capability retrieval: find tools relevant to this task's goal.
380        let capabilities_xml = if let Some(ref retriever) = self.tool_retriever {
381            match retriever.embedder().embed(goal).await {
382                Ok(query_vec) => {
383                    let results = retriever.retrieve(&query_vec, 8);
384                    if results.is_empty() {
385                        None
386                    } else {
387                        let xml = crate::tools::retrieval::format_capability_index(&results);
388                        tracing::info!(count = results.len(), "Retrieved relevant capabilities");
389                        Some(xml)
390                    }
391                }
392                Err(e) => {
393                    tracing::warn!(error = %e, "Failed to embed goal for retrieval");
394                    None
395                }
396            }
397        } else {
398            None
399        };
400
401        // Build kernel manifest from CSpace active domains.
402        let kernel_manifest = {
403            let domains = cspace.active_domains();
404            if domains.is_empty() {
405                None
406            } else {
407                Some(crate::tools::retrieval::build_kernel_manifest(&domains))
408            }
409        };
410
411        // Rebuild system prompt with capabilities and manifest if available.
412        if capabilities_xml.is_some() || kernel_manifest.is_some() {
413            system_prompt = build_system_prompt_inner(
414                goal,
415                original_request,
416                constraints,
417                acceptance_criteria,
418                ontology,
419                workspace_context,
420                persona_prompt.as_deref(),
421                capabilities_xml.as_deref(),
422                kernel_manifest.as_deref(),
423            );
424        }
425
426        // Blend relevant memories into system prompt.
427        let memory_manager = self.kernel_handle.agents.memory_manager();
428        match memory_manager
429            .recall_with_proactive(goal, &mut session_ctx.recall_timing)
430            .await
431        {
432            Ok(memories) if !memories.is_empty() => {
433                tracing::info!(count = memories.len(), "Recalled memories for task");
434                system_prompt = memory_manager.blend_into_prompt(&memories, &system_prompt);
435            }
436            Ok(_) => tracing::debug!("No memories recalled"),
437            Err(e) => tracing::warn!(error = %e, "Failed to recall memories"),
438        }
439
440        // Inject learned strategy from SONA (RFC-020 Phase 2).
441        if let Some(sona) = memory_manager.sona_engine() {
442            match sona.adapt(goal).await {
443                Ok(Some(pattern)) if pattern.confidence > 0.5 => {
444                    tracing::info!(
445                        domain = %pattern.domain,
446                        confidence = pattern.confidence,
447                        "SONA learned pattern injected"
448                    );
449                    system_prompt.push_str(&format!(
450                        "\n\n## Learned Strategy (confidence: {:.0}%)\n{}\n",
451                        pattern.confidence * 100.0,
452                        pattern.strategy,
453                    ));
454                }
455                Ok(_) => tracing::debug!("No high-confidence SONA pattern found"),
456                Err(e) => tracing::debug!(error = %e, "SONA adapt failed (non-fatal)"),
457            }
458        }
459
460        // Blend relevant knowledge notes into system prompt (KnowledgeLens, RFC-003 Phase 3).
461        match self
462            .kernel_handle
463            .knowledge_lens
464            .recall_for_context(goal, 5)
465            .await
466        {
467            Ok(ctx) if !ctx.notes.is_empty() => {
468                tracing::info!(
469                    notes = ctx.notes.len(),
470                    memories = ctx.memories.len(),
471                    "Recalled knowledge context for task"
472                );
473                let knowledge_blend = ctx
474                    .notes
475                    .iter()
476                    .take(3)
477                    .map(|n| format!("## {}\n\n{}", n.name, n.content))
478                    .collect::<Vec<_>>()
479                    .join("\n\n");
480                system_prompt.push_str("\n\n## Relevant Knowledge\n\n");
481                system_prompt.push_str(&knowledge_blend);
482            }
483            Ok(_) => tracing::debug!("No knowledge recalled"),
484            Err(e) => tracing::warn!(error = %e, "Failed to recall knowledge context"),
485        }
486
487        // Resolve the LIVE default model (post-hot-swap). This is the single
488        // source of truth — the same engine default the OuroborosEngine reads
489        // via the ModelResolver port. Validates fail-fast: a bad model ID set
490        // via the Web UI is rejected here at execute entry, before any tool work.
491        let engine = self.engine_handle.get();
492        let model_id = engine.default_model_id().to_string();
493        engine.resolve_model(&model_id)?;
494        // Synthetic per-execution ID for tracing. Seed path uses seed.id;
495        // Directive path mints a fresh UUID since Directive doesn't carry one.
496        let exec_id = persistence_seed
497            .map(|s| s.id)
498            .unwrap_or_else(uuid::Uuid::new_v4);
499
500        // Build the agent. Refresh config.model_id to the live value so every
501        // downstream consumer (AgentConfig, legacy provider path, usage callback)
502        // uses the same model as the interview/seed phases — no frozen boot
503        // string that silently diverges from what interview used.
504        let mut config = self.config.clone();
505        config.model_id = model_id;
506        let kernel_handle = Arc::clone(&self.kernel_handle);
507
508        // Extract audit trail from kernel for TrailAuditSink wiring.
509        let audit_trail: Option<Arc<AuditTrail>> =
510            Some(Arc::clone(&self.kernel_handle.security.audit_trail));
511
512        let (
513            mut final_content,
514            steps_completed,
515            success,
516            trajectory_steps,
517            agent,
518            tool_call_ids,
519            tool_args_map,
520            tool_error_map,
521            tool_timestamps,
522            total_input_tokens,
523            total_output_tokens,
524        ) = {
525            run_agent(
526                &config,
527                &engine,
528                kernel_handle,
529                system_prompt,
530                prompt,
531                exec_id,
532                goal.to_string(),
533                agent_id,
534                cspace,
535                audit_trail,
536                self.routing_stats.clone(),
537                session_id.clone(),
538                mount_paths,
539            )
540            .await?
541        };
542
543        // ── Post-execution: safety net for empty final content ──
544        //
545        // oxi 0.32.0 removed max_iterations — the loop now exits naturally
546        // when the LLM produces a text-only response (pi-agent behavior).
547        // This block is kept as a safety net in case the LLM returns empty
548        // text despite a natural exit (rare, but possible).
549        if final_content.is_empty() && !trajectory_steps.is_empty() {
550            let tool_summary: Vec<String> = trajectory_steps
551                .iter()
552                .enumerate()
553                .map(|(i, step)| {
554                    let truncated = if step.output.len() > 800 {
555                        // Char-boundary safe truncation: roll back to the
556                        // nearest UTF-8 boundary so multibyte sequences
557                        // (Korean, CJK, emoji) don't panic on byte slicing.
558                        let mut end = 800;
559                        while end > 0 && !step.output.is_char_boundary(end) {
560                            end -= 1;
561                        }
562                        format!("{}...", &step.output[..end])
563                    } else {
564                        step.output.clone()
565                    };
566                    format!("{}. [{}] {}", i + 1, step.input, truncated)
567                })
568                .collect();
569            let summary_prompt = format!(
570                "도구 실행 결과:\n\n{}\n\n\
571                 위 결과를 바탕으로 사용자의 요청에 대해 자연스럽게 한국어로 답변해주세요. \
572                 도구의 원시 출력을 그대로 복사하지 말고, 의미 있는 내용만 정리해서 전달하세요.",
573                tool_summary.join("\n")
574            );
575            match agent.run(summary_prompt).await {
576                Ok((response, _events)) => {
577                    if !response.content.is_empty() {
578                        tracing::info!(exec_id = %exec_id, "Post-execution summary generated");
579                        final_content = response.content;
580                    }
581                }
582                Err(e) => {
583                    tracing::warn!(error = %e, "Post-execution summary failed");
584                }
585            }
586        }
587
588        // Map trajectory steps to tool call records for the execution result.
589        // tool_call_ids[i] corresponds to trajectory_steps[i].
590        let tool_calls: Vec<oxios_ouroboros::ToolCallRecord> = trajectory_steps
591            .iter()
592            .enumerate()
593            .map(|(i, step)| {
594                let tc_id = tool_call_ids.get(i).cloned().unwrap_or_default();
595                let args_str = tool_call_ids
596                    .get(i)
597                    .and_then(|id| tool_args_map.get(id))
598                    .cloned()
599                    .unwrap_or_default();
600                let is_error = tool_call_ids
601                    .get(i)
602                    .and_then(|id| tool_error_map.get(id))
603                    .copied()
604                    .unwrap_or(false);
605                let timestamp = tool_call_ids
606                    .get(i)
607                    .and_then(|id| tool_timestamps.get(id))
608                    .copied();
609                let input_str = truncate_json_str(&args_str, 500);
610                oxios_ouroboros::ToolCallRecord {
611                    tool: step.input.clone(),
612                    input: input_str,
613                    output: step.output.clone(),
614                    duration_ms: step.duration_ms,
615                    is_error,
616                    tool_call_id: tc_id,
617                    timestamp,
618                }
619            })
620            .collect();
621
622        tracing::info!(
623            exec_id = %exec_id,
624            steps = steps_completed,
625            success,
626            tool_calls = tool_calls.len(),
627            "AgentRuntime finished"
628        );
629
630        let result = ExecutionResult {
631            output: final_content.clone(),
632            steps_completed,
633            success,
634            tool_calls,
635            tokens_input: total_input_tokens,
636            tokens_output: total_output_tokens,
637            model_id: self.engine_handle.get().default_model_id().to_string(),
638        };
639
640        // RFC-016: Autonomous persistence hook.
641        // Runs after successful execution, fire-and-forget.
642        // Only available on the Seed path today (persistence_seed is Some);
643        // the Directive path will gain its own hook adapter in Phase 6.
644        if let Some(seed) = persistence_seed
645            && success
646            && let Some(hook) = &self.persistence_hook
647        {
648            let already_saved_knowledge = trajectory_steps
649                .iter()
650                .any(|s| s.input == "knowledge" && s.output.contains("written successfully"));
651            let hook = hook.clone();
652            let seed_clone = seed.clone();
653            let traj_clone = trajectory_steps.clone();
654            let output_clone = final_content.clone();
655            let sid = session_id.clone();
656            // Compute the assistant message index for this execution.
657            // Increment per-session counter, then use the pre-increment value.
658            let msg_index = {
659                let mut counter = self.session_msg_counter.lock();
660                let idx = counter.entry(sid.clone().unwrap_or_default()).or_insert(0);
661                let current = *idx;
662                *idx += 1;
663                current
664            };
665            tokio::spawn(async move {
666                match hook
667                    .evaluate(
668                        &seed_clone,
669                        &traj_clone,
670                        &output_clone,
671                        already_saved_knowledge,
672                    )
673                    .await
674                {
675                    Ok(plan) => {
676                        if !plan.memory.is_empty() || !plan.knowledge.is_empty() {
677                            tracing::info!(
678                                memory = plan.memory.len(),
679                                knowledge = plan.knowledge.len(),
680                                message_index = msg_index,
681                                "PersistenceHook executing plan"
682                            );
683                            let session_id = sid.unwrap_or_default();
684                            hook.execute_plan(plan, &session_id, msg_index).await;
685                        }
686                    }
687                    Err(e) => tracing::warn!(error = %e, "PersistenceHook evaluate failed"),
688                }
689            });
690        }
691
692        Ok(result)
693    }
694}
695
696/// Create and run an oxi-sdk `Agent` with CSpace-based tool registration.
697///
698/// Uses `engine.oxi().agent()` (AgentBuilder) for full middleware,
699/// observability, and security integration from oxi-sdk 0.23.0.
700#[allow(clippy::too_many_arguments)]
701async fn run_agent(
702    config: &AgentRuntimeConfig,
703    engine: &OxiosEngine,
704    kernel_handle: Arc<KernelHandle>,
705    system_prompt: String,
706    prompt: String,
707    seed_id: uuid::Uuid,
708    seed_goal: String,
709    agent_id: AgentId,
710    cspace: crate::capability::CSpace,
711    audit_trail: Option<Arc<AuditTrail>>,
712    routing_stats: Option<Arc<crate::kernel_handle::RoutingStats>>,
713    session_id: Option<String>,
714    mount_paths: &[std::path::PathBuf],
715) -> Result<(
716    String,
717    usize,
718    bool,
719    Vec<oxios_memory::memory::sona::TrajectoryStep>,
720    Arc<Agent>,
721    Vec<String>,
722    std::collections::HashMap<String, String>,
723    std::collections::HashMap<String, bool>,
724    std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
725    u64,
726    u64,
727)> {
728    // Extract workspace.
729    // RFC-025: prefer the primary Mount's first path, then fall back to the
730    // legacy config.project_paths, then workspace_dir, then temp.
731    let workspace = if !mount_paths.is_empty() {
732        mount_paths[0].clone()
733    } else if !config.project_paths.is_empty() {
734        config.project_paths[0].clone()
735    } else if let Some(ref ws) = config.workspace_dir {
736        ws.clone()
737    } else {
738        std::env::temp_dir()
739            .join("oxios-agent-workspace")
740            .join(agent_id.to_string())
741    };
742
743    // Ensure workspace exists.
744    let _ = std::fs::create_dir_all(&workspace);
745
746    tracing::debug!(workspace = %workspace.display(), "Agent workspace scoped");
747
748    // Ensure all paths the agent might access are in allowed_paths.
749    //
750    // AgentLifecycleManager::ensure_permissions() adds kernel.workspace (~/.oxios/workspace),
751    // but the agent operates in different directories depending on context:
752    //
753    //   1. Process CWD — oxi-sdk 0.35+ bakes `workspace_dir` into file tools
754    //      via `with_cwd`, so ReadTool/LsTool resolve relatives against the
755    //      workspace, NOT the process CWD. However, oxios's own CSpace tools
756    //      (kernel-bridge tools wrapped in GatedTool) and bash/exec
757    //      subprocesses may still resolve against the process CWD. We grant
758    //      it as a safety net so those tools aren't denied by GatedTool.
759    //   2. The designated workspace — computed from mount_paths / workspace_dir / temp.
760    //   3. Kernel workspace — state store path for seeds, sessions, etc.
761    //   4. /tmp -- general temp file access.
762    //
763    // All four must be in allowed_paths before GatedTool wraps any tool.
764    {
765        use crate::access_manager::{Role, Subject};
766        let agent_name = format!("agent-{agent_id}");
767        let mut am = kernel_handle.exec.access_manager().lock();
768        let perms = am.get_or_create_permissions(&agent_name);
769
770        // 1. CWD -- critical: oxi-sdk resolves relative paths here
771        if let Ok(cwd) = std::env::current_dir() {
772            let cwd_pattern = format!("{}/**", cwd.to_string_lossy().trim_end_matches('/'));
773            if !perms.allowed_paths.iter().any(|p| p == &cwd_pattern) {
774                perms.allow_path(&cwd_pattern);
775                tracing::debug!(
776                    agent = %agent_name,
777                    path = %cwd_pattern,
778                    "Added CWD to agent allowed paths"
779                );
780            }
781        }
782
783        // 2. Designated workspace
784        let ws_pattern = format!("{}/**", workspace.to_string_lossy().trim_end_matches('/'));
785        if !perms.allowed_paths.iter().any(|p| p == &ws_pattern) {
786            perms.allow_path(&ws_pattern);
787        }
788
789        // 2b. RFC-025: every bound Mount grants path access.
790        //     This fixes the latent gap where only project_paths[0] was
791        //     accessible — now all Mount paths (multi-path work) are allowed.
792        //     Parent patterns already covering a path are skipped.
793        for mount_path in mount_paths {
794            let pattern = format!("{}/**", mount_path.to_string_lossy().trim_end_matches('/'));
795            if !perms.allowed_paths.iter().any(|p| p == &pattern) {
796                perms.allow_path(&pattern);
797                tracing::debug!(
798                    agent = %agent_name,
799                    path = %pattern,
800                    "Added Mount path to agent allowed paths (RFC-025)"
801                );
802            }
803        }
804
805        // 3. Kernel workspace (state store path)
806        let kernel_ws = kernel_handle
807            .state
808            .workspace_path()
809            .to_string_lossy()
810            .to_string();
811        let kernel_ws_pattern = format!("{}/**", kernel_ws.trim_end_matches('/'));
812        if kernel_ws_pattern != ws_pattern
813            && !perms.allowed_paths.iter().any(|p| p == &kernel_ws_pattern)
814        {
815            perms.allow_path(&kernel_ws_pattern);
816        }
817
818        // 4. /tmp -- for general temp file access
819        if !perms.allowed_paths.iter().any(|p| p == "/tmp/**") {
820            perms.allow_path("/tmp/**");
821        }
822
823        // Ensure RBAC Superuser role so AccessGate Layer 1 passes.
824        let rbac_subject = Subject::Agent(agent_id);
825        am.rbac_manager_mut()
826            .assign_role(rbac_subject, Role::Superuser);
827    }
828
829    // Start distributed trace span for this agent execution.
830    let _trace_guard = crate::observability::tracer().start(
831        format!("seed-{}", &seed_id.to_string()[..8]).as_str(),
832        oxi_sdk::SpanKind::Agent,
833    );
834
835    // ── Register tools based on CSpace (with access gate) ──
836    let registry = ToolRegistry::new();
837    let search_cache = Arc::new(SearchCache::new());
838
839    // Build agent context for security
840    let agent_context = AgentContext {
841        agent_id,
842        agent_name: format!("agent-{agent_id}"),
843        cspace: Arc::new(cspace.clone()),
844    };
845
846    // Build audit sink: TrailAuditSink (Merkle chain + JSONL) when audit_trail
847    // is available, otherwise fall back to TracingAuditSink.
848    let audit_sink: Arc<dyn crate::access_manager::AuditSink> = if let Some(trail) = audit_trail {
849        let audit_path = kernel_handle
850            .state
851            .workspace_path()
852            .join("audit")
853            .join("access.jsonl");
854        Arc::new(TrailAuditSink::new(trail, audit_path))
855    } else {
856        Arc::new(TracingAuditSink)
857    };
858
859    // Build access gate from kernel's security infrastructure
860    let access_gate = Arc::new(AccessGate::new(
861        kernel_handle.exec.access_manager().clone(),
862        Arc::new(kernel_handle.exec.config_snapshot()),
863        audit_sink,
864    ));
865
866    register_tools_from_cspace_gated(
867        &registry,
868        &kernel_handle,
869        &cspace,
870        search_cache,
871        agent_id,
872        access_gate,
873        agent_context,
874    );
875
876    tracing::info!(
877        seed_id = %seed_id,
878        capabilities = cspace.len(),
879        "Tools registered from CSpace"
880    );
881
882    // ── Build AgentConfig ──
883    //
884    // RFC-014 Phase D: `system_prompt` is also passed to the new
885    // `AgentBuilder::system_prompt()` (which overrides the value embedded
886    // in `AgentConfig` at build time). We clone here so the builder path
887    // can consume the value while the legacy `Agent::new_with_resolver`
888    // path still sees it in the config.
889    let agent_config = AgentConfig {
890        name: format!("agent-{agent_id}"),
891        description: None,
892        model_id: config.model_id.clone(),
893        system_prompt: Some(system_prompt.clone()),
894        timeout_seconds: 300,
895        temperature: Some(0.7),
896        max_tokens: Some(8192),
897        compaction_strategy: CompactionStrategy::Threshold(0.8),
898        compaction_instruction: None,
899        context_window: 128_000,
900        api_key: config.api_key.clone(),
901        workspace_dir: Some(workspace.clone()),
902        output_mode: None,
903        provider_options: config.provider_options.clone(),
904        // oxi-sdk 0.37.0+: ownership identity for oxi's built-in ownership-gated
905        // tools (e.g. the `issue` tool's flock). `None` preserves the pre-0.37.1
906        // behavior (ToolContext.session_id == None). Oxios runs its own tool
907        // set, so no ownership identity is needed here; set `Some(...)` only if
908        // oxios agents start using oxi ownership-gated tools.
909        session_id: None,
910        ..Default::default()
911    };
912
913    // ── Build Agent (RFC-014 Phase D) ──
914    //
915    // Two paths:
916    //   1. `provider_rpm == 0` (common): use oxi-sdk 0.26.2's new
917    //      `AgentBuilder` API. The builder unifies model resolution, provider
918    //      creation, and (optionally) middleware wiring. Engine-level
919    //      `authorizer` / `tracer` / `cost_tracker` are propagated through
920    //      the new builder methods.
921    //   2. `provider_rpm > 0` (rare): keep the legacy
922    //      `Agent::new_with_resolver` + `set_hooks` path because the
923    //      AgentBuilder does not expose a way to inject a pre-built
924    //      `ProviderPool` for rate-limited access. This is a deliberate
925    //      scope-limit per RFC-014/phase-d-agentbuilder.md §2 "Provider
926    //      선택 로직은 보존".
927    let agent = if config.provider_rpm > 0 {
928        // ── Legacy path: rate-limited provider pool ──
929        let resolver: Arc<dyn ProviderResolver> = Arc::new(engine.oxi().clone());
930        let provider_name = engine.resolve_model(&config.model_id)?.provider;
931        let provider = engine.pooled_provider(&provider_name, config.provider_rpm)?;
932
933        // Build middleware pipeline.
934        let mut pipeline = oxi_sdk::MiddlewarePipeline::new();
935        if config.rate_limit_per_minute > 0 {
936            pipeline = pipeline.push(oxi_sdk::middleware::builtins::RateLimitMiddleware::new(
937                config.rate_limit_per_minute,
938            ));
939        }
940        if config.token_budget > 0 {
941            pipeline = pipeline.push(oxi_sdk::middleware::builtins::TokenBudgetMiddleware::new(
942                config.token_budget,
943            ));
944        }
945        if config.audit_tool_calls {
946            pipeline = pipeline.push(oxi_sdk::middleware::builtins::LoggingMiddleware::new(
947                tracing::Level::INFO,
948            ));
949        }
950
951        // Create Agent with CSpace tool registry and provider resolver.
952        let agent = Arc::new(Agent::new_with_resolver(
953            provider,
954            agent_config,
955            Arc::new(registry),
956            resolver,
957        ));
958
959        // Wire middleware pipeline → AgentHooks.
960        if !pipeline.is_empty() {
961            let terminate_flag = Arc::new(std::sync::atomic::AtomicBool::new(false));
962            let agent_id_for_hooks = agent_id.to_string();
963            let hooks = oxi_sdk::middleware::build_hooks(
964                Arc::new(pipeline),
965                agent_id_for_hooks,
966                terminate_flag,
967            );
968            agent.set_hooks(hooks);
969        }
970
971        agent
972    } else {
973        // ── New path: AgentBuilder (RFC-014 Phase D) ──
974        let mut builder = engine
975            .oxi()
976            .agent(agent_config)
977            .workspace(&workspace)
978            .system_prompt(system_prompt);
979
980        // CSpace-based tool registration is oxios-specific and is preserved.
981        //
982        // The builder's `.tool()` method takes `impl AgentTool + 'static`
983        // (a concrete value), but oxios' CSpace tools are `Arc<dyn AgentTool>`.
984        // The SDK does not expose a way to inject a pre-built `ToolRegistry`
985        // into the builder, so we register them on the agent's tool registry
986        // after `build()` returns. This keeps CSpace semantics intact.
987        //
988        // We capture the tool names now and apply them once the agent exists.
989        let cspace_tool_arcs: Vec<Arc<dyn oxi_sdk::AgentTool>> = registry
990            .names()
991            .into_iter()
992            .filter_map(|name| registry.get(&name))
993            .collect();
994
995        // Engine-level observability/security → AgentBuilder (new API).
996        if let Some(auth) = engine.authorizer() {
997            builder = builder.authorizer(auth.clone());
998        }
999        if let Some(tracer) = engine.tracer() {
1000            builder = builder.tracer(tracer.clone());
1001        }
1002        if let Some(ct) = engine.cost_tracker() {
1003            builder = builder.cost_tracker(ct.clone());
1004        }
1005
1006        // Middleware: AgentBuilder convenience helpers replace the manual
1007        // `MiddlewarePipeline` + `build_hooks()` + `set_hooks()` triple.
1008        if config.rate_limit_per_minute > 0 {
1009            builder = builder.with_rate_limit(config.rate_limit_per_minute);
1010        }
1011        if config.token_budget > 0 {
1012            builder = builder.with_token_budget(config.token_budget);
1013        }
1014        if config.audit_tool_calls {
1015            builder = builder.with_logging();
1016        }
1017
1018        let built = builder.build()?;
1019        let agent = Arc::new(built);
1020
1021        // Attach CSpace tools to the agent's tool registry.
1022        // `Agent::tools()` returns the same `Arc<ToolRegistry>` that
1023        // `AgentBuilder` populated, so `register_arc` is the canonical
1024        // extension point for `Arc<dyn AgentTool>` values.
1025        let agent_tools = agent.tools();
1026        for tool in cspace_tool_arcs {
1027            agent_tools.register_arc(tool);
1028        }
1029
1030        agent
1031    };
1032
1033    // Shared mutable state for the event callback.
1034    let exec_state = Arc::new(Mutex::new(ExecuteState::default()));
1035    let exec_state_cb = Arc::clone(&exec_state);
1036    let memory_for_callback: Arc<MemoryManager> = (*kernel_handle.agents.memory_manager()).clone();
1037    let session_id_for_callback = seed_id.to_string();
1038    let model_id_for_callback = config.model_id.clone();
1039    let agent_id_for_callback = agent_id.to_string();
1040    let routing_stats_for_cb = routing_stats.clone();
1041    // RFC-015: real-time event publishing for chat transparency.
1042    // Falls back to None when the caller did not opt in.
1043    let transparency_session: Option<String> = session_id.clone();
1044    let kernel_handle_for_cb: Arc<KernelHandle> = Arc::clone(&kernel_handle);
1045
1046    // Run the agent with streaming events.
1047    let result = agent
1048        .run_streaming(prompt, move |event| {
1049            let mut s = exec_state_cb.lock();
1050            match event {
1051                AgentEvent::ToolExecutionStart {
1052                    tool_name,
1053                    tool_call_id,
1054                    args,
1055                    context,
1056                    ..
1057                } => {
1058                    // Record start time and push a placeholder step.
1059                    let idx = s.trajectory_steps.len();
1060                    s.pending_tools
1061                        .insert(tool_call_id.clone(), (std::time::Instant::now(), idx));
1062                    s.tool_args_map.insert(
1063                        tool_call_id.clone(),
1064                        serde_json::to_string(&args).unwrap_or_default(),
1065                    );
1066                    s.tool_timestamps
1067                        .insert(tool_call_id.clone(), chrono::Utc::now());
1068                    s.tool_call_ids.push(tool_call_id.clone());
1069                    s.trajectory_steps
1070                        .push(oxios_memory::memory::sona::TrajectoryStep {
1071                            input: tool_name.clone(),
1072                            output: String::new(),
1073                            duration_ms: 0,
1074                            confidence: 0.0,
1075                        });
1076                    // RFC-015: broadcast tool start so Web UI can show progress.
1077                    if let Some(ref sid) = transparency_session {
1078                        let context_json = context
1079                            .as_ref()
1080                            .map(serde_json::to_value)
1081                            .transpose()
1082                            .unwrap_or(None);
1083                        let _ =
1084                            kernel_handle_for_cb
1085                                .infra
1086                                .publish(KernelEvent::ToolExecutionStarted {
1087                                    session_id: sid.clone(),
1088                                    tool_name: tool_name.clone(),
1089                                    tool_call_id: tool_call_id.clone(),
1090                                    tool_args: args.clone(),
1091                                    context: context_json,
1092                                });
1093                    }
1094                }
1095                AgentEvent::ToolExecutionUpdate {
1096                    tool_call_id,
1097                    tool_name,
1098                    partial_result,
1099                    tab_id,
1100                    context,
1101                } => {
1102                    // RFC-015: forward real-time progress to the event bus
1103                    // so the Web UI can show a spinner and progress text
1104                    // while the tool is still executing. Best-effort —
1105                    // publish failures (e.g. lagged subscribers) are ignored.
1106                    //
1107                    // `tab_id` and `context` come from oxi-agent 0.29+
1108                    // (ToolCallContext: PageVisit, WebSearch, etc.).
1109                    // Older agent versions won't send these — they default
1110                    // to None and the UI gracefully ignores them.
1111                    if let Some(ref sid) = transparency_session {
1112                        let context_json = context
1113                            .as_ref()
1114                            .map(serde_json::to_value)
1115                            .transpose()
1116                            .unwrap_or(None);
1117                        let _ = kernel_handle_for_cb.infra.publish(
1118                            KernelEvent::ToolExecutionProgress {
1119                                session_id: sid.clone(),
1120                                tool_call_id: tool_call_id.clone(),
1121                                tool_name: tool_name.clone(),
1122                                progress: partial_result,
1123                                tab_id,
1124                                context: context_json,
1125                            },
1126                        );
1127                    }
1128                }
1129                AgentEvent::ToolExecutionEnd {
1130                    tool_name,
1131                    tool_call_id,
1132                    is_error,
1133                    result,
1134                    ..
1135                } => {
1136                    if !is_error {
1137                        s.steps_completed += 1;
1138                    }
1139                    // Look up the exact step by tool_call_id.
1140                    let mut duration_ms: u64 = 0;
1141                    let mut summary = String::new();
1142                    if let Some((start, idx)) = s.pending_tools.remove(tool_call_id.as_str()) {
1143                        duration_ms = start.elapsed().as_millis() as u64;
1144                        if let Some(step) = s.trajectory_steps.get_mut(idx) {
1145                            summary = summarize_tool_result(&result.content, 200);
1146                            step.output = summary.clone();
1147                            step.duration_ms = duration_ms;
1148                            step.confidence = if is_error { 0.3 } else { 0.8 };
1149                        }
1150                    }
1151                    s.tool_error_map.insert(tool_call_id.clone(), is_error);
1152                    // RFC-015: broadcast tool completion.
1153                    if let Some(ref sid) = transparency_session {
1154                        let _ = kernel_handle_for_cb.infra.publish(
1155                            KernelEvent::ToolExecutionFinished {
1156                                session_id: sid.clone(),
1157                                tool_call_id: tool_call_id.clone(),
1158                                tool_name: tool_name.clone(),
1159                                duration_ms,
1160                                is_error,
1161                                output_summary: summary,
1162                            },
1163                        );
1164                    }
1165                }
1166                AgentEvent::AgentEnd {
1167                    messages,
1168                    stop_reason,
1169                    ..
1170                } => {
1171                    if let Some(oxi_sdk::Message::Assistant(a)) = messages.last() {
1172                        s.final_content = a.text_content();
1173                    }
1174                    // oxi 0.32.0: loop exits naturally when LLM produces text-only
1175                    // response (StopReason::Stop). Error/Aborted = failure.
1176                    // ToolUse should not occur at AgentEnd in 0.32.0 (the loop
1177                    // continues until text-only), but treat it as non-failure
1178                    // since tool calls were executed successfully.
1179                    s.success = matches!(stop_reason.as_deref(), Some("Stop") | Some("ToolUse"));
1180                }
1181                AgentEvent::Error { message, .. } => {
1182                    s.final_content = message.clone();
1183                    s.success = false;
1184                }
1185                AgentEvent::Usage {
1186                    input_tokens,
1187                    output_tokens,
1188                } => {
1189                    // Accumulate totals for ExecutionResult.
1190                    s.total_input_tokens += input_tokens as u64;
1191                    s.total_output_tokens += output_tokens as u64;
1192
1193                    // Record token usage to cost tracker (existing).
1194                    let agent_label = format!("agent-{agent_id_for_callback}");
1195                    crate::observability::cost_tracker().record(
1196                        &agent_label,
1197                        &oxi_sdk::Model::new(
1198                            &model_id_for_callback,
1199                            &model_id_for_callback,
1200                            oxi_sdk::Api::OpenAiCompletions,
1201                            "unknown",
1202                            "https://unknown.com",
1203                        ),
1204                        oxi_sdk::TokenUsage {
1205                            input: input_tokens as u64,
1206                            output: output_tokens as u64,
1207                            cache_read: 0,
1208                            cache_write: 0,
1209                        },
1210                    );
1211
1212                    // Record to routing stats (RFC-011).
1213                    if let Some(stats) = &routing_stats_for_cb {
1214                        let cost = crate::kernel_handle::engine_api::estimate_cost(
1215                            &model_id_for_callback,
1216                            input_tokens as u64,
1217                            output_tokens as u64,
1218                        );
1219                        stats.record_model_usage(&model_id_for_callback, cost);
1220                    }
1221                    // RFC-015: publish cumulative token usage.
1222                    if let Some(ref sid) = transparency_session {
1223                        let _ = kernel_handle_for_cb
1224                            .infra
1225                            .publish(KernelEvent::TokenUsageUpdate {
1226                                session_id: sid.clone(),
1227                                input_tokens: input_tokens as u64,
1228                                output_tokens: output_tokens as u64,
1229                            });
1230                    }
1231                }
1232                AgentEvent::Compaction {
1233                    event: CompactionEvent::Completed { result, .. },
1234                } => {
1235                    handle_compaction(
1236                        result.summary.clone(),
1237                        session_id_for_callback.clone(),
1238                        memory_for_callback.clone(),
1239                    );
1240                    // RFC-015: compaction is a form of reasoning — expose it.
1241                    if let Some(ref sid) = transparency_session {
1242                        let _ =
1243                            kernel_handle_for_cb
1244                                .infra
1245                                .publish(KernelEvent::ReasoningFragment {
1246                                    session_id: sid.clone(),
1247                                    content: result.summary.clone(),
1248                                    source: "compaction".to_string(),
1249                                });
1250                    }
1251                }
1252                _ => {}
1253            }
1254        })
1255        .await;
1256
1257    // Record circuit breaker result after agent execution.
1258    let circuit = get_llm_circuit_breaker();
1259    if result.is_err() {
1260        circuit.record_failure();
1261        crate::metrics::get_metrics()
1262            .llm_circuit_breaker_state
1263            .set(1.0);
1264    } else {
1265        circuit.record_success();
1266        crate::metrics::get_metrics()
1267            .llm_circuit_breaker_state
1268            .set(0.0);
1269    }
1270
1271    if let Err(e) = result {
1272        tracing::error!(seed_id = %seed_id, error = %e, "Agent failed");
1273        let s = exec_state.lock();
1274        return Ok((
1275            format!("Agent failed: {e}"),
1276            s.steps_completed,
1277            false,
1278            s.trajectory_steps.clone(),
1279            agent,
1280            s.tool_call_ids.clone(),
1281            s.tool_args_map.clone(),
1282            s.tool_error_map.clone(),
1283            s.tool_timestamps.clone(),
1284            s.total_input_tokens,
1285            s.total_output_tokens,
1286        ));
1287    }
1288
1289    let s = exec_state.lock();
1290    tracing::info!(
1291        seed_id = %seed_id,
1292        steps = s.steps_completed,
1293        success = s.success,
1294        "Agent completed"
1295    );
1296
1297    // Record trajectory to SONA learning engine (RFC-020 Phase 2).
1298    // Fire-and-forget: don't block the result on learning.
1299    if !s.trajectory_steps.is_empty()
1300        && let Some(sona) = kernel_handle.agents.memory_manager().sona_engine()
1301    {
1302        let steps = s.trajectory_steps.clone();
1303        let success = s.success;
1304        let sona = Arc::clone(sona);
1305        let domain = infer_domain(&seed_goal);
1306        tokio::spawn(async move {
1307            let verdict = if success {
1308                oxios_memory::memory::sona::Verdict::Success
1309            } else {
1310                oxios_memory::memory::sona::Verdict::Failure
1311            };
1312            let trajectory = oxios_memory::memory::sona::Trajectory::new(steps, verdict, &domain);
1313            if let Err(e) = sona.record(trajectory).await {
1314                tracing::debug!(error = %e, "SONA trajectory recording failed (non-fatal)");
1315            }
1316        });
1317    }
1318
1319    Ok((
1320        s.final_content.clone(),
1321        s.steps_completed,
1322        s.success,
1323        s.trajectory_steps.clone(),
1324        agent,
1325        s.tool_call_ids.clone(),
1326        s.tool_args_map.clone(),
1327        s.tool_error_map.clone(),
1328        s.tool_timestamps.clone(),
1329        s.total_input_tokens,
1330        s.total_output_tokens,
1331    ))
1332}
1333
1334/// Summarize a tool result string to fit within `max_len` characters.
1335///
1336/// Uses char-aware truncation to avoid panicking on multi-byte UTF-8
1337/// (e.g., Korean, CJK, emoji).
1338fn summarize_tool_result(result: &str, max_len: usize) -> String {
1339    let trimmed = result.trim();
1340    if trimmed.chars().count() <= max_len {
1341        return trimmed.to_string();
1342    }
1343    // Take the first line or truncate.
1344    let first_line = trimmed.lines().next().unwrap_or("");
1345    if first_line.chars().count() <= max_len {
1346        first_line.to_string()
1347    } else {
1348        let take = max_len.saturating_sub(3);
1349        let truncated: String = if take == 0 {
1350            first_line.chars().take(max_len).collect()
1351        } else {
1352            first_line.chars().take(take).collect()
1353        };
1354        format!("{truncated}...")
1355    }
1356}
1357fn truncate_json_str(json_str: &str, max_len: usize) -> String {
1358    if json_str.len() <= max_len {
1359        return json_str.to_string();
1360    }
1361    // Saturating sub avoids underflow panic when max_len < 3; if there
1362    // isn't room for an ellipsis, return as many chars as fit.
1363    let take = max_len.saturating_sub(3);
1364    if take == 0 {
1365        return json_str.chars().take(max_len).collect();
1366    }
1367    let truncated: String = json_str.chars().take(take).collect();
1368    format!("{truncated}...")
1369}
1370
1371/// Infer a domain category from a seed goal for SONA trajectory grouping.
1372///
1373/// Extracts the core verb + object from the goal to create a meaningful
1374/// domain label. Falls back to "general" for unrecognizable patterns.
1375fn infer_domain(goal: &str) -> String {
1376    let lower = goal.to_lowercase();
1377    let keywords: Vec<&str> = lower.split_whitespace().take(8).collect();
1378
1379    // Check for known domain indicators.
1380    if keywords.iter().any(|k| {
1381        [
1382            "test",
1383            "tests",
1384            "spec",
1385            "testing",
1386            "assert",
1387            "unit test",
1388            "integration",
1389        ]
1390        .contains(k)
1391    }) {
1392        return "testing".to_string();
1393    }
1394    if keywords
1395        .iter()
1396        .any(|k| ["deploy", "release", "publish", "ship"].contains(k))
1397    {
1398        return "deployment".to_string();
1399    }
1400    if keywords
1401        .iter()
1402        .any(|k| ["fix", "bug", "patch", "repair", "debug"].contains(k))
1403    {
1404        return "bugfix".to_string();
1405    }
1406    if keywords
1407        .iter()
1408        .any(|k| ["refactor", "restructure", "reorganize", "rewrite"].contains(k))
1409    {
1410        return "refactoring".to_string();
1411    }
1412    if keywords
1413        .iter()
1414        .any(|k| ["doc", "document", "readme", "guide", "explain"].contains(k))
1415    {
1416        return "documentation".to_string();
1417    }
1418    if keywords
1419        .iter()
1420        .any(|k| ["build", "create", "implement", "add", "make", "new"].contains(k))
1421    {
1422        return "development".to_string();
1423    }
1424    if keywords
1425        .iter()
1426        .any(|k| ["analyze", "review", "audit", "inspect", "check"].contains(k))
1427    {
1428        return "analysis".to_string();
1429    }
1430    if keywords
1431        .iter()
1432        .any(|k| ["config", "setup", "install", "configure", "init"].contains(k))
1433    {
1434        return "configuration".to_string();
1435    }
1436
1437    // Fallback: first 2 meaningful words
1438    let meaningful: Vec<&str> = lower
1439        .split_whitespace()
1440        .filter(|w| w.len() > 2)
1441        .take(2)
1442        .collect();
1443    if meaningful.len() >= 2 {
1444        meaningful.join("_")
1445    } else {
1446        "general".to_string()
1447    }
1448}
1449
1450/// Handle compaction completion by storing the summary as a Warm memory.
1451///
1452/// Extracts the compaction summary from the event and spawns a background
1453/// task to persist it via MemoryManager. This replaces the inline 30-line
1454/// block that was previously in the event callback.
1455fn handle_compaction(summary: String, session_id: String, memory_manager: Arc<MemoryManager>) {
1456    let entry = MemoryEntry {
1457        id: uuid::Uuid::new_v4().to_string(),
1458        memory_type: MemoryType::Conversation,
1459        tier: crate::memory::MemoryTier::Warm,
1460        content: summary,
1461        content_hash: 0,
1462        source: "compaction".to_string(),
1463        session_id: Some(session_id),
1464        tags: vec![],
1465        importance: 0.5,
1466        pinned: false,
1467        protection: crate::memory::ProtectionLevel::None,
1468        auto_classified: false,
1469        session_appearances: 0,
1470        user_corrected: false,
1471        seen_in_sessions: vec![],
1472        created_at: chrono::Utc::now(),
1473        accessed_at: chrono::Utc::now(),
1474        modified_at: chrono::Utc::now(),
1475        access_count: 0,
1476        decay_score: 1.0,
1477        compaction_level: 0,
1478        compacted_from: vec![],
1479        related_ids: vec![],
1480        contradicts: None,
1481    };
1482    tokio::spawn(async move {
1483        if let Err(e) = memory_manager.remember(entry).await {
1484            tracing::warn!(error = %e, "Failed to save compaction summary");
1485        }
1486    });
1487}
1488
1489/// Build a system prompt from the Seed's goal, constraints, persona,
1490/// and optionally a capability index and kernel manifest.
1491#[allow(dead_code)]
1492fn build_system_prompt(
1493    seed: &Seed,
1494    persona_prompt: Option<&str>,
1495    capabilities_xml: Option<&str>,
1496    kernel_manifest: Option<&str>,
1497    workspace_context: Option<&str>,
1498) -> String {
1499    build_system_prompt_inner(
1500        &seed.goal,
1501        &seed.original_request,
1502        &seed.constraints,
1503        &seed.acceptance_criteria,
1504        &seed.ontology,
1505        workspace_context,
1506        persona_prompt,
1507        capabilities_xml,
1508        kernel_manifest,
1509    )
1510}
1511
1512/// Build a system prompt from a Directive and ExecEnv (RFC-027).
1513///
1514/// Maps [`Directive`] fields (`goal`, `original_request`, `constraints`,
1515/// `acceptance_criteria`) and [`ExecEnv`] fields (`workspace_context`) into
1516#[allow(dead_code)]
1517fn build_directive_system_prompt(
1518    directive: &Directive,
1519    env: &ExecEnv,
1520    persona_prompt: Option<&str>,
1521    capabilities_xml: Option<&str>,
1522    kernel_manifest: Option<&str>,
1523) -> String {
1524    let ontology: &[Entity] = &[];
1525    build_system_prompt_inner(
1526        &directive.goal,
1527        &directive.original_request,
1528        &directive.constraints,
1529        &directive.acceptance_criteria,
1530        ontology,
1531        env.workspace_context.as_deref(),
1532        persona_prompt,
1533        capabilities_xml,
1534        kernel_manifest,
1535    )
1536}
1537
1538/// Shared system-prompt builder for Seed and Directive paths.
1539///
1540/// Composes the static agent prelude, goal/constraints/criteria sections,
1541/// optional workspace context and ontology, persona, capability index, and
1542/// kernel manifest into a single prompt string. The ontology section is
1543/// Seed-only; Directive callers pass an empty slice.
1544#[allow(clippy::too_many_arguments)]
1545fn build_system_prompt_inner(
1546    goal: &str,
1547    original_request: &str,
1548    constraints: &[String],
1549    acceptance_criteria: &[String],
1550    ontology: &[Entity],
1551    workspace_context: Option<&str>,
1552    persona_prompt: Option<&str>,
1553    capabilities_xml: Option<&str>,
1554    kernel_manifest: Option<&str>,
1555) -> String {
1556    let mut prompt = String::from(
1557        "You are an autonomous agent in the Oxios operating system.\n\
1558         You execute Seeds — immutable specifications with goals, constraints, and\n\
1559         acceptance criteria.\n\n\
1560         ## Available Tools\n\
1561         You have the following tools:\n\
1562         - **File tools**: read, write, edit files; grep, find, ls for searching\n\
1563         - **Web tools**: web_search for searching the web, get_search_results for retrieving cached results\n\
1564         - **Exec**: run shell commands\n\
1565         - **Memory tools**: memory_read, memory_write, memory_search — agent's internal recall\n\
1566         - **Knowledge**: knowledge — personal markdown vault for documents and notes\n\
1567         - **Kernel tools**: agent, project, persona, cron, security, budget, resource\n\n\
1568         **Important**: When the task involves fetching information from the internet,\n\
1569         websites, or online services, use `web_search` first — do NOT search local files.\n\
1570         When the task asks to \"get\", \"fetch\", \"find online\", or \"look up\" something\n\
1571         from the web, use `web_search`.\n",
1572    );
1573    prompt.push_str(&format!("\n## Goal\n{}\n", goal));
1574
1575    // Preserve user's original wording so the agent sees exact language,
1576    // filenames, and nuances that may have been abstracted in the goal.
1577    if !original_request.is_empty() && original_request != goal {
1578        prompt.push_str(&format!(
1579            "\n## User's Original Request\n{}\n",
1580            original_request
1581        ));
1582    }
1583
1584    if !constraints.is_empty() {
1585        prompt.push_str("\n## Constraints\n");
1586        for (i, c) in constraints.iter().enumerate() {
1587            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1588        }
1589    }
1590
1591    if !acceptance_criteria.is_empty() {
1592        prompt.push_str("\n## Acceptance Criteria\n");
1593        for (i, c) in acceptance_criteria.iter().enumerate() {
1594            prompt.push_str(&format!("{}. {}\n", i + 1, c));
1595        }
1596    }
1597
1598    // ── Workspace Context (RFC-025) ──
1599    // Inject active Mounts + project instructions AFTER the goal/constraints
1600    // and BEFORE the persona, so the agent sees its workspace before it acts.
1601    if let Some(ctx) = workspace_context.filter(|s| !s.trim().is_empty()) {
1602        prompt.push_str("\n## Workspace Context\n");
1603        prompt.push_str(ctx);
1604        prompt.push('\n');
1605    }
1606
1607    if !ontology.is_empty() {
1608        prompt.push_str("\n## Domain Entities\n");
1609        for e in ontology {
1610            prompt.push_str(&format!(
1611                "- **{}** ({}): {}\n",
1612                e.name, e.entity_type, e.description
1613            ));
1614        }
1615    }
1616
1617    // Inject persona system prompt
1618    if let Some(pp) = persona_prompt {
1619        prompt.push_str("\n## Persona\n");
1620        prompt.push_str(pp);
1621        prompt.push('\n');
1622    }
1623
1624    // Inject semantic capability index (from ToolRetriever)
1625    if let Some(xml) = capabilities_xml {
1626        prompt.push_str("\n## Available Capabilities\n");
1627        prompt.push_str("The following capabilities are relevant to your goal. ");
1628        prompt.push_str("Use the `read` tool to load SKILL.md for any program.\n\n");
1629        prompt.push_str(xml);
1630        prompt.push('\n');
1631    }
1632
1633    // Inject kernel manifest (from CSpace)
1634    if let Some(manifest) = kernel_manifest {
1635        prompt.push('\n');
1636        prompt.push_str(manifest);
1637        prompt.push('\n');
1638    }
1639
1640    // Execution environment guidance
1641    prompt.push_str(
1642        "\n## Execution Protocol\n\
1643         1. UNDERSTAND — Read the Seed completely before acting.\n\
1644         2. PLAN — Determine the minimal set of actions needed.\n\
1645         3. EXECUTE — Use tools to accomplish the goal. Prefer the simplest approach.\n\
1646         4. VERIFY — After each action, check the result: created a file? read it back.\n\
1647         5. REPORT — Summarize how each acceptance criterion was met, with evidence.\n\n\
1648         ## Hard Boundaries\n\
1649         - NEVER modify files outside the workspace scope\n\
1650         - NEVER execute destructive commands without confirming scope\n\
1651         - NEVER claim completion without evidence — show the output, not your opinion\n\
1652         - NEVER add features or improvements beyond the Seed scope\n\
1653         - If you cannot complete the Seed, say so and explain WHY\n\n\
1654         ## Scope Guard\n\
1655         The Seed defines your universe. Do not:\n\
1656         - Refactor code the Seed didn't mention\n\
1657         - Add tests the Seed didn't require\n\
1658         - Change configuration the Seed didn't specify\n\
1659         - \"Improve\" anything beyond what the acceptance criteria demand\n\n\
1660         ## Error Handling\n\
1661         - If a tool fails, read the error message carefully before retrying\n\
1662         - If a command fails, do NOT immediately retry with --force or sudo\n\
1663         - If stuck after 3 attempts, report the blocker rather than continuing to fail\n\n\
1664         ## Shape Matching\n\
1665         Match your output to the task: simple task → concise response.\n\
1666         Do not write 50 lines when 5 would do.\n\
1667         Use `exec` for all command execution (git, gh, osascript, etc.).",
1668    );
1669
1670    prompt
1671}
1672#[allow(dead_code)]
1673fn build_user_prompt(seed: &Seed) -> String {
1674    build_user_prompt_inner(&seed.goal, &seed.acceptance_criteria)
1675}
1676#[allow(dead_code)]
1677fn build_directive_user_prompt(directive: &Directive) -> String {
1678    build_user_prompt_inner(&directive.goal, &directive.acceptance_criteria)
1679}
1680
1681/// Shared user-prompt builder for Seed and Directive paths.
1682fn build_user_prompt_inner(goal: &str, acceptance_criteria: &[String]) -> String {
1683    format!(
1684        "Execute the following goal:\n\n{}\n\nAcceptance criteria:\n{}",
1685        goal,
1686        acceptance_criteria
1687            .iter()
1688            .enumerate()
1689            .map(|(i, c)| format!("{}. {}", i + 1, c))
1690            .collect::<Vec<_>>()
1691            .join("\n")
1692    )
1693}
1694
1695impl std::fmt::Debug for AgentRuntime {
1696    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1697        f.debug_struct("AgentRuntime")
1698            .field("model_id", &self.engine_handle.get().default_model_id())
1699            .finish()
1700    }
1701}
1702
1703#[cfg(test)]
1704mod tests {
1705    use super::*;
1706    use async_trait::async_trait;
1707    use oxi_sdk::{AgentTool, ToolContext, ToolError};
1708    use oxios_ouroboros::Entity;
1709    use serde_json::Value;
1710
1711    /// A test tool that does nothing — used to populate the registry.
1712    struct DummyTool {
1713        name: String,
1714    }
1715
1716    #[async_trait]
1717    impl AgentTool for DummyTool {
1718        fn name(&self) -> &str {
1719            &self.name
1720        }
1721        fn label(&self) -> &str {
1722            &self.name
1723        }
1724        fn description(&self) -> &str {
1725            "Test tool"
1726        }
1727        fn parameters_schema(&self) -> Value {
1728            serde_json::json!({"type": "object"})
1729        }
1730
1731        async fn execute(
1732            &self,
1733            _tool_call_id: &str,
1734            _params: Value,
1735            _shutdown: Option<tokio::sync::oneshot::Receiver<()>>,
1736            _ctx: &ToolContext,
1737        ) -> Result<oxi_sdk::AgentToolResult, ToolError> {
1738            Ok(oxi_sdk::AgentToolResult::success("ok"))
1739        }
1740    }
1741
1742    /// Test that requires_tools validation passes when all tools are present.
1743    #[test]
1744    fn test_requires_tools_validation_passes() {
1745        let registry = ToolRegistry::new();
1746
1747        registry.register(DummyTool {
1748            name: "read".into(),
1749        });
1750        registry.register(DummyTool {
1751            name: "exec".into(),
1752        });
1753
1754        let missing = registry.missing(&["read", "exec"]);
1755
1756        assert!(
1757            missing.is_empty(),
1758            "Expected no missing tools, got: {:?}",
1759            missing
1760        );
1761    }
1762
1763    /// Test that requires_tools validation fails when a tool is missing.
1764    #[test]
1765    fn test_requires_tools_validation_fails() {
1766        let registry = ToolRegistry::new();
1767
1768        registry.register(DummyTool {
1769            name: "read".into(),
1770        });
1771
1772        let missing = registry.missing(&["read", "exec", "nonexistent"]);
1773
1774        assert_eq!(missing, vec!["exec", "nonexistent"]);
1775    }
1776
1777    #[test]
1778    fn test_build_system_prompt_includes_goal() {
1779        let seed = Seed {
1780            id: uuid::Uuid::new_v4(),
1781            goal: "Build a web server".into(),
1782            constraints: vec!["Must use Rust".into()],
1783            acceptance_criteria: vec!["Server responds to requests".into()],
1784            ontology: vec![Entity {
1785                name: "HttpServer".into(),
1786                entity_type: "struct".into(),
1787                description: "The main server struct".into(),
1788            }],
1789            created_at: chrono::Utc::now(),
1790            generation: 0,
1791            parent_seed_id: None,
1792            cspace_hint: None,
1793            original_request: String::new(),
1794            output_schema: None,
1795            project_id: None,
1796            workspace_context: None,
1797            mount_paths: Vec::new(),
1798        };
1799
1800        let prompt = build_system_prompt(&seed, None, None, None, None);
1801
1802        assert!(prompt.contains("Build a web server"));
1803        assert!(prompt.contains("Must use Rust"));
1804        assert!(prompt.contains("Server responds to requests"));
1805        assert!(prompt.contains("HttpServer"));
1806        assert!(prompt.contains("struct"));
1807    }
1808
1809    #[test]
1810    fn test_build_system_prompt_empty() {
1811        let seed = Seed {
1812            id: uuid::Uuid::new_v4(),
1813            goal: "Test goal".into(),
1814            constraints: vec![],
1815            acceptance_criteria: vec![],
1816            ontology: vec![],
1817            created_at: chrono::Utc::now(),
1818            generation: 0,
1819            parent_seed_id: None,
1820            cspace_hint: None,
1821            original_request: String::new(),
1822            output_schema: None,
1823            project_id: None,
1824            workspace_context: None,
1825            mount_paths: Vec::new(),
1826        };
1827
1828        let prompt = build_system_prompt(&seed, None, None, None, None);
1829
1830        assert!(prompt.contains("Test goal"));
1831    }
1832
1833    #[test]
1834    fn test_infer_domain_testing() {
1835        assert_eq!(infer_domain("run all unit tests for the kernel"), "testing");
1836    }
1837
1838    #[test]
1839    fn test_infer_domain_deployment() {
1840        assert_eq!(
1841            infer_domain("deploy the web service to production"),
1842            "deployment"
1843        );
1844    }
1845
1846    #[test]
1847    fn test_infer_domain_bugfix() {
1848        assert_eq!(infer_domain("fix the null pointer error in main"), "bugfix");
1849    }
1850
1851    #[test]
1852    fn test_infer_domain_development() {
1853        assert_eq!(
1854            infer_domain("create a new REST API endpoint"),
1855            "development"
1856        );
1857    }
1858
1859    #[test]
1860    fn test_infer_domain_analysis() {
1861        assert_eq!(
1862            infer_domain("review the code for security issues"),
1863            "analysis"
1864        );
1865    }
1866
1867    #[test]
1868    fn test_infer_domain_fallback() {
1869        let domain = infer_domain("optimize performance metrics");
1870        // Should fall back to first 2 meaningful words
1871        assert!(!domain.is_empty());
1872    }
1873}
oxios_kernel/agent_runtime.rs

oxios_kernel/
agent_runtime.rs