Skip to main content

hyperlight_js/sandbox/monitor/
mod.rs

1/*
2Copyright 2026  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16//! Execution monitoring for JavaScript sandbox handlers.
17//!
18//! This module provides the [`ExecutionMonitor`] trait and built-in implementations
19//! for monitoring and terminating handler execution based on resource limits.
20//!
21//! # Architecture — Why two traits?
22//!
23//! The monitoring system has a subtle design tension:
24//!
25//! 1. **Users** want a simple trait to implement: `get_monitor()` + `name()`.
26//! 2. **The orchestrator** needs to race multiple monitors, identifying which
27//!    one fired (by name) for metrics and logging.
28//! 3. **Tuples** of monitors (e.g. `(WallClockMonitor, CpuTimeMonitor)`) are
29//!    a **composition** of monitors, not a single monitor — they shouldn't
30//!    pretend to be one by implementing `ExecutionMonitor`.
31//!
32//! The solution: **separate concerns into two traits**.
33//!
34//! - [`ExecutionMonitor`] — User-facing. Only two methods: `get_monitor()` and
35//!   `name()`. Simple, clean, no composition logic.
36//! - [`MonitorSet`] — Internal (sealed). One method: [`to_race()`](MonitorSet::to_race).
37//!   Produces a single racing future that completes when the first monitor
38//!   fires, emitting metrics and logging the winner. Automatically derived
39//!   for every `ExecutionMonitor` via a blanket impl, and for tuples of up
40//!   to 5 monitors via `tokio::select!` in a macro.
41//!
42//! The orchestrator's `handle_event_with_monitor` is bounded by
43//! `M: MonitorSet`, not `M: ExecutionMonitor`. Users never need to know
44//! `MonitorSet` exists — it's sealed so they can't implement it directly,
45//! and it's derived automatically via the blanket impl.
46//!
47//! # Built-in Monitors
48//!
49//! - [`WallClockMonitor`] - Terminates execution after a wall-clock timeout
50//!   (requires `monitor-wall-clock` feature)
51//! - [`CpuTimeMonitor`] - Terminates execution after a CPU time limit
52//!   (requires `monitor-cpu-time` feature)
53//!
54//! # Usage
55//!
56//! ```text
57//! use hyperlight_js::{WallClockMonitor, CpuTimeMonitor, ExecutionMonitor};
58//! use std::time::Duration;
59//!
60//! // Single monitor — ExecutionMonitor auto-satisfies MonitorSet via blanket impl
61//! let monitor = WallClockMonitor::new(Duration::from_secs(5))?;
62//! let result = loaded_sandbox.handle_event_with_monitor(
63//!     "handler",
64//!     "{}".to_string(),
65//!     &monitor,
66//!     None,
67//! )?;
68//!
69//! // Multiple monitors — tuples implement MonitorSet with OR semantics.
70//! // The first monitor to trigger terminates execution, and the winning
71//! // monitor's name is logged so you know exactly which limit was breached.
72//! let wall = WallClockMonitor::new(Duration::from_secs(5))?;
73//! let cpu = CpuTimeMonitor::new(Duration::from_millis(500))?;
74//! let result = loaded_sandbox.handle_event_with_monitor(
75//!     "handler",
76//!     "{}".to_string(),
77//!     &(wall, cpu),
78//!     None,
79//! )?;
80//! ```
81//!
82//! # Custom Monitors
83//!
84//! Implement [`ExecutionMonitor`] to create custom monitoring logic:
85//!
86//! ```text
87//! use hyperlight_js::ExecutionMonitor;
88//! use hyperlight_host::Result;
89//! use std::future::Future;
90//!
91//! struct MyMonitor { limit: std::time::Duration }
92//!
93//! impl ExecutionMonitor for MyMonitor {
94//!     fn get_monitor(&self) -> Result<impl Future<Output = ()> + Send + 'static> {
95//!         let limit = self.limit;
96//!         Ok(async move {
97//!             hyperlight_js::monitor::sleep(limit).await;
98//!             tracing::warn!("Custom limit exceeded");
99//!         })
100//!     }
101//!
102//!     fn name(&self) -> &'static str { "my-monitor" }
103//! }
104//! ```
105//!
106//! # Fail-Closed Semantics
107//!
108//! If any monitor fails to initialize (`get_monitor()` returns `Err`), the handler
109//! is **never executed**. Execution cannot proceed unmonitored due to a monitor
110//! initialization failure. This is a deliberate security-first design choice.
111//!
112//! # Using Wall-Clock and CPU Monitors Together
113//!
114//! Wall-clock and CPU monitors are designed to be used together as a tuple
115//! `(WallClockMonitor, CpuTimeMonitor)` to provide comprehensive protection:
116//!
117//! - **`CpuTimeMonitor`** catches compute-bound abuse (crypto mining, tight loops)
118//! - **`WallClockMonitor`** catches resource exhaustion where the guest consumes
119//!   **host resources without consuming CPU** — e.g. blocking on host calls. A guest doing
120//!   "nothing" in terms of CPU can still starve the host of resources (sometimes
121//!   called a **resource exhaustion attack** or **slowloris-style denial of service**)
122//!   Right now this is not really possible to do in Hyperlight since there is no way for
123//!   the guest to block without consuming CPU, but we want to be prepared for when this is possible.
124//!
125//! Neither alone is sufficient: CPU-only misses idle resource holding; wall-clock-only
126//! is unfair to legitimately I/O-heavy workloads.
127//!
128//! # Runtime Configuration
129//!
130//! The shared async runtime thread count can be configured via environment variable:
131//!
132//! ```bash
133//! export HYPERLIGHT_MONITOR_THREADS=4  # Default is 2
134//! ```
135//!
136//! See the `runtime` module for details on the shared runtime.
137
138use std::future::Future;
139use std::pin::Pin;
140
141use hyperlight_host::Result;
142
143use crate::sandbox::metrics::{METRIC_MONITOR_TERMINATIONS, METRIC_MONITOR_TYPE_LABEL};
144
145/// Record that a monitor triggered execution termination.
146///
147/// Emits the `monitor_terminations_total` counter metric with the winning
148/// monitor's name as the `monitor_type` label, and logs a warning.
149pub(crate) fn record_monitor_triggered(triggered_by: &'static str) {
150    metrics::counter!(
151        METRIC_MONITOR_TERMINATIONS,
152        METRIC_MONITOR_TYPE_LABEL => triggered_by
153    )
154    .increment(1);
155
156    tracing::warn!("Monitor '{triggered_by}' fired — requesting execution termination");
157}
158
159/// A monitor that enforces execution limits on handler invocations.
160///
161/// Implementations watch handler execution and signal termination when limits
162/// are exceeded (time limits, CPU usage, resource quotas, etc.).
163///
164/// This is the **only trait users need to implement**. The sealed [`MonitorSet`]
165/// trait is automatically derived via a blanket impl. See the
166/// [module docs](self) for the full architecture rationale.
167///
168/// # Why `fn` returning `impl Future` instead of `async fn`
169///
170/// The method body executes synchronously on the **calling thread** and returns
171/// an opaque `Future` that will be spawned on the shared monitor runtime.
172/// This two-phase design lets monitors capture thread-local state (e.g.,
173/// [`CpuTimeMonitor`]'s `pthread_getcpuclockid`) before the future migrates
174/// to a tokio worker thread.
175///
176/// # Contract
177///
178/// - **Method body** (sync): Runs on the calling thread. Capture thread-local
179///   state here. Return `Err` to fail closed (handler never runs).
180/// - **Returned future** (async): Will be spawned on the monitor runtime. Stays pending
181///   while within limits. **Completes when execution should be terminated.**
182///   Will be aborted if the handler finishes first.
183///
184/// # Example
185///
186/// ```text
187/// use hyperlight_js::ExecutionMonitor;
188/// use hyperlight_host::Result;
189/// use std::future::Future;
190///
191/// struct TimeoutMonitor { timeout: std::time::Duration }
192///
193/// impl ExecutionMonitor for TimeoutMonitor {
194///     fn get_monitor(&self) -> Result<impl Future<Output = ()> + Send + 'static> {
195///         let timeout = self.timeout;
196///         Ok(async move {
197///             hyperlight_js::monitor::sleep(timeout).await;
198///             tracing::warn!("Timeout exceeded");
199///         })
200///     }
201///
202///     fn name(&self) -> &'static str { "timeout" }
203/// }
204/// ```
205pub trait ExecutionMonitor: Send + Sync {
206    /// Prepare and return a monitoring future for a single handler invocation.
207    ///
208    /// The method body runs synchronously on the calling thread — use it to
209    /// capture thread-local state (e.g., CPU clock handles). The returned
210    /// future will be spawned on the shared monitor runtime.
211    ///
212    /// The future should stay pending while execution is within limits and
213    /// complete (return `()`) when execution should be terminated. It will
214    /// be aborted if the handler finishes normally before the monitor fires.
215    ///
216    /// # Errors
217    ///
218    /// Return `Err` if the monitor cannot initialize (e.g., OS API failure).
219    /// This will prevent the handler from executing (fail-closed semantics).
220    fn get_monitor(&self) -> Result<impl Future<Output = ()> + Send + 'static>;
221
222    /// Human-readable name for logging and metrics.
223    fn name(&self) -> &'static str;
224}
225
226// =============================================================================
227// MonitorSet — sealed composition trait
228// =============================================================================
229// See module-level docs ("Architecture — Why two traits?") for the full rationale.
230// In short: keeps ExecutionMonitor clean (two methods, no composition) while
231// giving the orchestrator a single racing future with metrics baked in.
232
233/// Prevents external crates from implementing [`MonitorSet`] directly.
234///
235/// Uses the [sealed trait pattern](https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed).
236mod private {
237    pub trait Sealed {}
238}
239
240/// A composable set of monitors that produces a single racing future.
241///
242/// This trait is **sealed** — you cannot implement it directly. It is
243/// automatically derived for:
244///
245/// - Any type that implements [`ExecutionMonitor`] (wraps the single future)
246/// - Tuples of up to 5 `ExecutionMonitor` implementors (races via `tokio::select!`)
247///
248/// The orchestration layer (`handle_event_with_monitor`) bounds on
249/// `M: MonitorSet` and calls [`to_race()`](MonitorSet::to_race) to get
250/// a single future that completes when the first monitor fires.
251pub trait MonitorSet: private::Sealed + Send + Sync {
252    /// Produce a single future that races all monitors in this set.
253    ///
254    /// Each sub-monitor's `get_monitor()` is called on the **calling thread**
255    /// so monitors can capture thread-local state (e.g., CPU clock handles).
256    /// The returned future completes when the first monitor fires, returning
257    /// the winning monitor's name for metrics, logging, and stats.
258    fn to_race(&self) -> Result<Pin<Box<dyn Future<Output = &'static str> + Send>>>;
259}
260
261// Every ExecutionMonitor is automatically a MonitorSet of one.
262impl<M: ExecutionMonitor> private::Sealed for M {}
263
264impl<M: ExecutionMonitor> MonitorSet for M {
265    fn to_race(&self) -> Result<Pin<Box<dyn Future<Output = &'static str> + Send>>> {
266        let future = self.get_monitor()?;
267        let name = self.name();
268        Ok(Box::pin(async move {
269            future.await;
270            name
271        }))
272    }
273}
274
275// =============================================================================
276// Tuple composition — OR semantics via tokio::select!
277// =============================================================================
278
279/// Generates a [`MonitorSet`] impl for a tuple of N `ExecutionMonitor`s.
280///
281/// Each sub-monitor's `get_monitor()` runs on the calling thread (preserving
282/// thread-local state). The generated `to_race()` uses `tokio::select!` to
283/// race all futures. The tuple is NOT an `ExecutionMonitor` — it's a composition that
284/// satisfies `MonitorSet` directly.
285macro_rules! impl_monitor_set_tuple {
286    (($($p:ident: $P:ident),+)) => {
287        impl<$($P: ExecutionMonitor),+> private::Sealed for ($($P,)+) {}
288
289        impl<$($P: ExecutionMonitor),+> MonitorSet for ($($P,)+) {
290            fn to_race(&self) -> Result<Pin<Box<dyn Future<Output = &'static str> + Send>>> {
291                let ($($p,)+) = &self;
292                // Each get_monitor() runs here on the calling thread,
293                // preserving thread-local state (e.g. CPU clock handles).
294                $(let $p = ($p.get_monitor()?, $p.name());)+
295
296                Ok(Box::pin(async move {
297                    // Race all monitors — first to complete wins.
298                    tokio::select! {
299                        $(_ = $p.0 => $p.1,)+
300                    }
301                }))
302            }
303        }
304    };
305}
306
307// 1-tuple: not strictly necessary (bare `M: ExecutionMonitor` satisfies
308// `MonitorSet` via the blanket impl), but a user might write `(monitor,)`
309// and expect it to compile. No conflict with the blanket — `(T,)` and `T`
310// are distinct types in Rust.
311impl_monitor_set_tuple!((m0: M0));
312impl_monitor_set_tuple!((m0: M0, m1: M1));
313impl_monitor_set_tuple!((m0: M0, m1: M1, m2: M2));
314impl_monitor_set_tuple!((m0: M0, m1: M1, m2: M2, m3: M3));
315impl_monitor_set_tuple!((m0: M0, m1: M1, m2: M2, m3: M3, m4: M4));
316
317// Feature-gated monitor implementations
318#[cfg(feature = "monitor-wall-clock")]
319mod wall_clock;
320#[cfg(feature = "monitor-wall-clock")]
321pub use wall_clock::WallClockMonitor;
322
323#[cfg(feature = "monitor-cpu-time")]
324pub(crate) mod cpu_time;
325#[cfg(feature = "monitor-cpu-time")]
326pub use cpu_time::CpuTimeMonitor;
327
328// Shared runtime for monitor orchestration
329pub(crate) mod runtime;
330
331/// Async sleep function used by monitors.
332///
333/// Re-exported here so that custom monitor implementations don't couple
334/// directly to `tokio`.  If the underlying async runtime changes in a
335/// future release, only this re-export needs updating — downstream
336/// monitors remain source-compatible.
337pub use tokio::time::sleep;