Skip to main content

runtimo_core/
lib.rs

1//! Runtimo Core — Agent-centric capability runtime.
2//!
3//! Runtimo provides structured execution, resource limits, crash recovery,
4//! and two-layer telemetry (hardware + process tracking) for machines that
5//! cannot be factory-reset. Every capability execution captures before/after
6//! snapshots, with full audit trails and undo support.
7//!
8//! # Architecture
9//!
10//! - **Capabilities** — Pluggable operations implementing the [`Capability`] trait
11//! - **Jobs** — Lifecycle-tracked execution units (Job, [`JobState`])
12//! - **Telemetry** — Hardware awareness ([`Telemetry`])
13//! - **Process Snapshot** — Running process awareness ([`ProcessSnapshot`])
14//! - **WAL** — Append-only crash recovery log ([`WalWriter`]/[`WalReader`])
15//! - **Backup** — Undo support via pre-mutation file backups ([`BackupManager`])
16//! - **Resource Guards** — Circuit breaker via [`LlmoSafeGuard`]
17//!
18//! # Quick Start
19//!
20//! ```rust
21//! use runtimo_core::{FileRead, Capability, Context};
22//! use serde_json::json;
23//!
24//! let cap = FileRead;
25//! assert_eq!(cap.name(), "FileRead");
26//! ```
27//!
28//! # Execution with Full Telemetry
29//!
30//! ```rust,ignore
31//! use runtimo_core::{FileRead, execute_with_telemetry};
32//! use serde_json::json;
33//! use std::path::Path;
34//!
35//! let cap = FileRead;
36//! let result = execute_with_telemetry(
37//!     &cap,
38//!     &json!({"path": "/tmp/test.txt"}),
39//!     false,
40//!     Path::new("/tmp/runtimo.wal"),
41//! ).unwrap();
42//! assert!(result.success);
43//! ```
44//!
45//! # Performance (Measured on AMD EPYC 7B13)
46//!
47//! | Operation | Latency | Notes |
48//! |-----------|---------|-------|
49//! | Cold start | <1s | Binary load + init |
50//! | FileRead | <10ms | Small files (<1KB) |
51//! | FileWrite | <50ms | Includes backup copy |
52//! | Telemetry capture | <100ms | 15+ shell subprocesses |
53//! | Process snapshot | <50ms | ps aux parse |
54//! | Memory baseline | <50MB | RSS at idle |
55//!
56//! # Feature Flags
57//!
58//! No optional features currently. All functionality is included by default.
59
60// Allow idiomatic test lints in test mode as panic/unwrap/indexing are standard in tests.
61#![cfg_attr(
62    test,
63    allow(
64        clippy::unwrap_used,
65        clippy::expect_used,
66        clippy::indexing_slicing,
67        clippy::unused_result_ok
68    )
69)]
70
71pub mod backup;
72/// Pluggable capability implementations (file I/O, shell, git, etc.).
73pub mod capabilities;
74/// Core trait and registry for pluggable operations.
75pub mod capability;
76/// Shell command execution helper.
77pub mod cmd;
78/// Global configuration and path resolution.
79pub mod config;
80/// Capability executor with telemetry and safety guards.
81pub mod executor;
82/// Job identity, state machine, and WAL event types.
83pub mod job;
84/// LLM safety guard — CPU/RAM circuit breakers and entropy source.
85pub mod llmosafe;
86/// Health monitoring with alerting.
87pub mod monitor;
88/// Process snapshot, zombie detection, and top-N queries.
89pub mod processes;
90/// Session tracking for reliable SSH.
91pub mod session;
92/// System telemetry capture and reporting.
93pub mod telemetry;
94/// Path validation against allowed-prefix lists.
95pub mod validation;
96/// Write-ahead log for crash recovery.
97pub mod wal;
98
99pub use backup::BackupManager;
100pub use capabilities::{FileRead, FileWrite, GitExec, Kill, ShellExec, Undo};
101pub use capability::{
102    Capability, CapabilityError, CapabilityRegistry, Context, Output, TypedCapability,
103};
104pub use config::RuntimoConfig;
105pub use executor::{execute_with_telemetry, execute_with_telemetry_and_session};
106pub use job::{Job, JobId, JobState};
107pub use llmosafe::LlmoSafeGuard;
108pub use monitor::HealthMonitor;
109pub use processes::ProcessSnapshot;
110pub use telemetry::Telemetry;
111pub use wal::{WalEvent, WalEventType, WalReader, WalWriter};
112
113/// Error types for runtimo-core.
114///
115/// Covers all failure modes: state transitions, schema validation,
116/// capability execution, WAL/backup errors, resource limits, and telemetry.
117#[allow(clippy::exhaustive_enums)] // new variants are semver-breaking regardless
118#[derive(Debug, thiserror::Error)]
119pub enum Error {
120    /// Invalid job state transition attempted.
121    #[error("Invalid job state transition: {from:?} -> {to:?}")]
122    InvalidTransition { from: JobState, to: JobState },
123
124    /// JSON schema validation failed for capability arguments.
125    #[error("Schema validation failed: {0}")]
126    SchemaValidationFailed(String),
127
128    /// Requested capability not found in registry.
129    #[error("Capability not found: {0}")]
130    CapabilityNotFound(String),
131
132    /// Capability execution failed.
133    #[error("Execution failed: {0}")]
134    ExecutionFailed(String),
135
136    /// Execution failed with a structured capability error.
137    ///
138    /// This variant preserves the original `CapabilityError` variant information
139    /// that would otherwise be lost to stringification in the blanket impl at
140    /// `capability.rs:431`. Clients can match on this variant to programmatically
141    /// distinguish `PermissionDenied` from `NotFound`, `InvalidArgs`, etc.
142    ///
143    /// # Fields
144    /// - `msg`: Human-readable error message (for display/logging)
145    /// - `variant`: Machine-readable variant name (for programmatic handling)
146    /// - `code`: JSON-RPC error code in range -32000 to -32099 (server-defined errors)
147    ///
148    /// # Example
149    /// ```rust,ignore
150    /// match error {
151    ///     Error::CapabilityExecutionFailed { code, variant, msg } => {
152    ///         eprintln!("Error {}: {} - {}", code, variant, msg);
153    ///     }
154    ///     _ => {}
155    /// }
156    /// ```
157    #[error("Capability execution failed: {variant} - {msg}")]
158    CapabilityExecutionFailed {
159        msg: String,
160        variant: &'static str,
161        code: i32,
162    },
163
164    /// Write-Ahead Log operation failed.
165    #[error("WAL error: {0}")]
166    WalError(String),
167
168    /// Backup/restore operation failed.
169    #[error("Backup error: {0}")]
170    BackupError(String),
171
172    /// Session operation failed (create, load, save, list).
173    #[error("Session error: {0}")]
174    SessionError(String),
175
176    /// System resource limit exceeded (CPU, RAM, or zombie count).
177    #[error("Resource limit exceeded: {0}")]
178    ResourceLimitExceeded(String),
179
180    /// Telemetry capture failed.
181    #[error("Telemetry error: {0}")]
182    TelemetryError(String),
183
184    /// Cognitive safety violation detected by LLMOSafe.
185    #[error("Cognitive safety violation: {0}")]
186    CognitiveSafetyViolation(String),
187}
188
189/// Result alias for runtimo-core operations.
190pub type Result<T> = std::result::Result<T, Error>;
191
192/// Utility functions for path management.
193pub mod utils {
194    use std::path::PathBuf;
195
196    /// Returns the data directory following XDG spec.
197    ///
198    /// Uses `XDG_DATA_HOME` if set, otherwise `~/.local/share/runtimo`.
199    ///
200    /// Falls back to `/tmp/runtimo` with a stderr warning when neither
201    /// `XDG_DATA_HOME` nor `HOME` is set. Data in `/tmp` is not persistent
202    /// across reboots — WAL and backup durability guarantees are degraded
203    /// in this fallback mode.
204    pub fn data_dir() -> PathBuf {
205        let base = std::env::var("XDG_DATA_HOME")
206            .ok()
207            .map(PathBuf::from)
208            .or_else(|| {
209                std::env::var("HOME")
210                    .ok()
211                    .map(|h| PathBuf::from(h).join(".local/share"))
212            });
213        if let Some(dir) = base {
214            dir.join("runtimo")
215        } else {
216            eprintln!(
217                "[runtimo] Warning: XDG_DATA_HOME and HOME unset — using /tmp/runtimo \
218                 (data will not survive reboot)"
219            );
220            PathBuf::from("/tmp/runtimo")
221        }
222    }
223
224    /// Returns the WAL path (env override or default).
225    pub fn wal_path() -> PathBuf {
226        std::env::var("RUNTIMO_WAL_PATH")
227            .map_or_else(|_| data_dir().join("wal.jsonl"), PathBuf::from)
228    }
229
230    /// Returns the backup directory derived from `data_dir()`.
231    ///
232    /// Always returns `data_dir().join("backups")`. This is a derived path
233    /// from the trusted `data_dir` root — no env var override is available
234    /// (see ADR-C28). External config of the backup location would create
235    /// an attacker control vector.
236    #[must_use]
237    pub fn backup_dir() -> PathBuf {
238        data_dir().join("backups")
239    }
240
241    /// Generates a unique ID from 16 random bytes (32 hex chars).
242    ///
243    /// Uses `/dev/urandom` for collision resistance — P(collision) < 10⁻¹⁵
244    /// even at 100 IDs/sec for 1 hour. Falls back to timestamp if urandom
245    /// is unavailable (e.g., non-Linux platforms).
246    #[must_use]
247    pub fn generate_id() -> String {
248        let mut bytes = [0u8; 16];
249        if std::fs::File::open("/dev/urandom")
250            .ok()
251            .and_then(|mut f| std::io::Read::read_exact(&mut f, &mut bytes).ok())
252            .is_some()
253        {
254            #[allow(clippy::format_collect)]
255            bytes.iter().map(|b| format!("{b:02x}")).collect()
256        } else {
257            // Fallback: timestamp-based (collision possible but rare)
258            let ts = std::time::SystemTime::now()
259                .duration_since(std::time::UNIX_EPOCH)
260                .unwrap_or_default()
261                .as_nanos();
262            format!("{:x}", ts)
263        }
264    }
265}