regorus 0.9.1

A fast, lightweight Rego (OPA policy language) interpreter
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

use crate::rvm::program::Program;
#[cfg(feature = "allocator-memory-limits")]
use crate::utils::limits;
use crate::utils::limits::{
    fallback_execution_timer_config, monotonic_now, ExecutionTimer, ExecutionTimerConfig,
    LimitError,
};
use crate::value::Value;
use crate::CompiledPolicy;
use alloc::collections::{btree_map::Entry, BTreeMap, VecDeque};
#[cfg(feature = "allocator-memory-limits")]
use alloc::format;
use alloc::string::String;
use alloc::sync::Arc;
use alloc::vec;
use alloc::vec::Vec;
use core::time::Duration;

use super::context::{CallRuleContext, ComprehensionContext, LoopContext};
use super::errors::{Result, VmError};
use super::execution_model::{
    BreakpointSet, ExecutionMode, ExecutionStack, ExecutionState, SuspendReason,
};

/// The Rego Virtual Machine
#[derive(Debug)]
pub struct RegoVM {
    /// Registers for storing values during execution
    pub(super) registers: Vec<Value>,

    /// Program counter
    pub(super) pc: usize,

    /// The compiled program containing instructions, literals, and metadata
    pub(super) program: Arc<Program>,

    /// Reference to the compiled policy for default rule access
    pub(super) compiled_policy: Option<CompiledPolicy>,

    /// Rule execution cache: rule_index -> (computed: bool, result: Value)
    pub(super) rule_cache: Vec<(bool, Value)>,

    /// Global data object
    pub(super) data: Value,

    /// Global input object
    pub(super) input: Value,

    /// Loop execution stack
    /// Note: Loops are either at the outermost level (rule body) or within the topmost comprehension.
    /// Loops never contain comprehensions - it's always the other way around.
    pub(super) loop_stack: Vec<LoopContext>,

    /// Call rule execution stack for managing nested rule calls
    pub(super) call_rule_stack: Vec<CallRuleContext>,

    /// Register stack for isolated register spaces during rule calls
    pub(super) register_stack: Vec<Vec<Value>>,

    /// Comprehension execution stack for tracking active comprehensions
    /// Note: Comprehensions can be nested within each other, forming a proper nesting hierarchy.
    /// Any loops within a comprehension belong to the topmost (current) comprehension context.
    pub(super) comprehension_stack: Vec<ComprehensionContext>,

    /// Base register window size for the main execution context
    pub(super) base_register_count: usize,

    /// Object pools for performance optimization
    /// Pool of register windows for reuse during rule calls
    pub(super) register_window_pool: Vec<Vec<Value>>,

    /// Maximum number of instructions to execute (default: 25000)
    pub(super) max_instructions: usize,

    /// Current count of executed instructions
    pub(super) executed_instructions: usize,

    /// Cache for evaluated paths in virtual data document lookup
    /// Structure: evaluated[path_component1][path_component2]...[Undefined] = result_value
    pub(super) evaluated: Value,

    /// Counter for cache hits during virtual data document lookup evaluation
    pub(super) cache_hits: usize,

    /// Explicit execution stack used when running in suspendable mode
    pub(super) execution_stack: ExecutionStack,

    /// Current execution state of the VM
    pub(super) execution_state: ExecutionState,

    /// Active breakpoints for the suspendable engine
    pub(super) breakpoints: BreakpointSet,

    /// Flag indicating whether single-step mode is active
    pub(super) step_mode: bool,

    /// Preloaded responses for HostAwait in run-to-completion execution keyed by identifier
    pub(super) host_await_responses: BTreeMap<Value, VecDeque<Value>>,

    /// Current execution mode (run-to-completion vs suspendable)
    pub(super) execution_mode: ExecutionMode,

    /// Tracks whether the current top-of-stack frame PC was explicitly set by an instruction
    pub(super) frame_pc_overridden: bool,

    /// Whether builtins should raise errors strictly or return undefined on failure
    pub(super) strict_builtin_errors: bool,

    /// Cache for builtin calls that must stay deterministic across a single evaluation
    pub(super) builtins_cache: BTreeMap<(&'static str, Vec<Value>), Value>,

    /// Optional override for the execution timer configuration
    pub(super) execution_timer_config: Option<ExecutionTimerConfig>,

    /// Cooperative execution timer used to enforce wall-clock limits
    pub(super) execution_timer: ExecutionTimer,

    /// Elapsed wall-clock time recorded when the VM entered a suspended state
    pub(super) execution_timer_elapsed_at_suspend: Option<Duration>,
}

impl Default for RegoVM {
    fn default() -> Self {
        Self::new()
    }
}

impl RegoVM {
    /// Create a new virtual machine
    pub fn new() -> Self {
        let fallback_timer = fallback_execution_timer_config();

        RegoVM {
            registers: Vec::new(), // Start with no registers - will be resized when program is loaded
            pc: 0,
            program: Arc::new(Program::default()),
            compiled_policy: None,
            rule_cache: Vec::new(),
            data: Value::Null,
            input: Value::Null,
            loop_stack: Vec::new(),
            call_rule_stack: Vec::new(),
            register_stack: Vec::new(),
            comprehension_stack: Vec::new(),
            base_register_count: 2, // Default to 2 registers for basic operations
            register_window_pool: Vec::new(), // Initialize register window pool
            max_instructions: 25000, // Default maximum instruction limit
            executed_instructions: 0,
            evaluated: Value::new_object(), // Initialize evaluation cache
            cache_hits: 0,                  // Initialize cache hit counter
            execution_stack: ExecutionStack::new(),
            execution_state: ExecutionState::Ready,
            breakpoints: BreakpointSet::new(),
            step_mode: false,
            host_await_responses: BTreeMap::new(),
            execution_mode: ExecutionMode::RunToCompletion,
            frame_pc_overridden: false,
            strict_builtin_errors: false,
            builtins_cache: BTreeMap::new(),
            execution_timer_config: None,
            execution_timer: ExecutionTimer::new(fallback_timer),
            execution_timer_elapsed_at_suspend: None,
        }
    }

    /// Create a new virtual machine with compiled policy for default rule support
    pub fn new_with_policy(compiled_policy: CompiledPolicy) -> Self {
        let mut vm = Self::new();
        vm.compiled_policy = Some(compiled_policy);
        vm
    }

    /// Load a complete program for execution
    pub fn load_program(&mut self, program: Arc<Program>) {
        self.program = program.clone();

        // Use the dispatch window size from the program for initial register allocation
        let dispatch_size = usize::from(program.dispatch_window_size).max(2); // Ensure at least 2 registers
        self.base_register_count = dispatch_size;

        // Resize registers to match program requirements
        self.registers.clear();
        self.registers.resize(dispatch_size, Value::Undefined);

        // Initialize rule cache
        self.rule_cache = vec![(false, Value::Undefined); program.rule_infos.len()];

        // Set PC to main entry point
        self.pc = usize::try_from(program.main_entry_point).unwrap_or(0);
        self.executed_instructions = 0; // Reset instruction counter
    }

    /// Set the compiled policy for default rule evaluation
    pub fn set_compiled_policy(&mut self, compiled_policy: CompiledPolicy) {
        self.compiled_policy = Some(compiled_policy);
    }

    /// Set the maximum number of instructions that can be executed
    pub const fn set_max_instructions(&mut self, max: usize) {
        self.max_instructions = max;
    }

    /// Set the base register count for the main execution context
    /// This determines how many registers are available in the root register window
    pub fn set_base_register_count(&mut self, count: usize) {
        self.base_register_count = count.max(1); // Ensure at least 1 register
        if !self.registers.is_empty() {
            self.registers
                .resize(self.base_register_count, Value::Undefined);
        }
    }

    /// Set the global data object
    pub fn set_data(&mut self, data: Value) -> Result<()> {
        // Check for conflicts between rule tree and data
        self.program.check_rule_data_conflicts(&data)?;

        self.data = data;
        Ok(())
    }

    /// Set the global input object
    pub fn set_input(&mut self, input: Value) {
        self.input = input;
    }

    /// Get the number of entry points available
    pub fn get_entry_point_count(&self) -> usize {
        self.program.entry_points.len()
    }

    /// Get all entry point names
    pub fn get_entry_point_names(&self) -> Vec<String> {
        self.program.entry_points.keys().cloned().collect()
    }

    // Public getters for visualization
    pub const fn get_pc(&self) -> usize {
        self.pc
    }

    pub const fn get_registers(&self) -> &Vec<Value> {
        &self.registers
    }

    pub const fn get_program(&self) -> &Arc<Program> {
        &self.program
    }

    pub const fn get_call_stack(&self) -> &Vec<CallRuleContext> {
        &self.call_rule_stack
    }

    pub const fn get_loop_stack(&self) -> &Vec<LoopContext> {
        &self.loop_stack
    }

    pub const fn get_cache_hits(&self) -> usize {
        self.cache_hits
    }

    /// Set the execution mode for the VM
    pub const fn set_execution_mode(&mut self, mode: ExecutionMode) {
        self.execution_mode = mode;
    }

    /// Configure whether builtin operations should raise errors strictly
    pub const fn set_strict_builtin_errors(&mut self, strict: bool) {
        self.strict_builtin_errors = strict;
    }

    /// Returns whether builtin operations raise errors strictly
    pub const fn strict_builtin_errors(&self) -> bool {
        self.strict_builtin_errors
    }

    /// Enable or disable single-step execution for suspendable runs
    pub const fn set_step_mode(&mut self, enabled: bool) {
        self.step_mode = enabled;
    }

    /// Configure the sequence of HostAwait responses for run-to-completion execution
    pub fn set_host_await_responses<I, J>(&mut self, responses: I)
    where
        I: IntoIterator<Item = (Value, J)>,
        J: IntoIterator<Item = Value>,
    {
        self.host_await_responses.clear();

        for (identifier, values) in responses {
            let mut queue = VecDeque::new();
            queue.extend(values);

            match self.host_await_responses.entry(identifier) {
                Entry::Vacant(entry) => {
                    entry.insert(queue);
                }
                Entry::Occupied(mut entry) => {
                    entry.get_mut().extend(queue);
                }
            }
        }
    }

    pub(super) fn next_host_await_response(
        &mut self,
        identifier: &Value,
        dest: u8,
    ) -> Result<Value> {
        let missing_error = || VmError::HostAwaitResponseMissing {
            dest,
            identifier: identifier.clone(),
            pc: self.pc,
        };

        let (response, should_remove) = {
            let queue = self
                .host_await_responses
                .get_mut(identifier)
                .ok_or_else(missing_error)?;

            let response = queue.pop_front().ok_or_else(missing_error)?;
            let should_remove = queue.is_empty();
            (response, should_remove)
        };

        if should_remove {
            self.host_await_responses.remove(identifier);
        }

        Ok(response)
    }

    /// Get the current execution mode
    pub const fn get_execution_mode(&self) -> ExecutionMode {
        self.execution_mode
    }

    /// Configure the execution timer to use the supplied configuration, or fall back to the global
    /// default when `None` is provided.
    pub fn set_execution_timer_config(&mut self, config: Option<ExecutionTimerConfig>) {
        self.execution_timer_config = config;
        self.reset_execution_timer_state();
    }

    /// Returns the currently configured execution timer, if any.
    pub const fn execution_timer_config(&self) -> Option<ExecutionTimerConfig> {
        self.execution_timer_config
    }

    pub(super) fn reset_execution_timer_state(&mut self) {
        let config = self.effective_execution_timer_config();
        self.execution_timer = ExecutionTimer::new(config);
        self.execution_timer_elapsed_at_suspend = None;

        if config.is_none() {
            return;
        }

        if let Some(now) = monotonic_now() {
            self.execution_timer.start(now);
        }
    }

    fn effective_execution_timer_config(&self) -> Option<ExecutionTimerConfig> {
        self.execution_timer_config
            .or_else(fallback_execution_timer_config)
    }

    pub(super) fn execution_timer_tick(&mut self, work_units: u32) -> Result<()> {
        if self.execution_timer.limit().is_none() {
            return Ok(());
        }

        let Some(now) = monotonic_now() else {
            return Ok(());
        };

        self.execution_timer
            .tick(work_units, now)
            .map_err(|err| match err {
                LimitError::TimeLimitExceeded { elapsed, limit } => VmError::TimeLimitExceeded {
                    elapsed,
                    limit,
                    pc: self.pc,
                },
                LimitError::MemoryLimitExceeded { usage, limit } => VmError::MemoryLimitExceeded {
                    usage,
                    limit,
                    pc: self.pc,
                },
            })
    }

    pub(super) fn snapshot_execution_timer_on_suspend(&mut self) {
        if self.execution_timer.config().is_none() {
            self.execution_timer_elapsed_at_suspend = None;
            return;
        }

        let Some(now) = monotonic_now() else {
            self.execution_timer_elapsed_at_suspend = None;
            return;
        };

        self.execution_timer_elapsed_at_suspend = self.execution_timer.elapsed(now);
    }

    pub(super) fn restore_execution_timer_after_resume(&mut self) {
        if self.execution_timer.config().is_none() {
            self.execution_timer_elapsed_at_suspend = None;
            return;
        }

        let Some(elapsed) = self.execution_timer_elapsed_at_suspend.take() else {
            return;
        };

        let Some(now) = monotonic_now() else {
            return;
        };

        self.execution_timer.resume_from_elapsed(now, elapsed);
    }

    /// Get the current execution state of the VM
    pub const fn execution_state(&self) -> &ExecutionState {
        &self.execution_state
    }

    /// Get the suspend reason if the VM is currently suspended
    pub const fn suspend_reason(&self) -> Option<&SuspendReason> {
        match self.execution_state {
            ExecutionState::Suspended { ref reason, .. } => Some(reason),
            _ => None,
        }
    }

    #[inline]
    #[allow(dead_code)]
    pub(super) fn get_register(&self, index: u8) -> Result<&Value> {
        self.registers
            .get(usize::from(index))
            .ok_or(VmError::RegisterIndexOutOfBounds {
                index,
                pc: self.pc,
                register_count: self.registers.len(),
            })
    }

    #[inline]
    #[allow(dead_code)]
    pub(super) fn set_register(&mut self, index: u8, value: Value) -> Result<()> {
        let register_count = self.registers.len();

        let slot = self.registers.get_mut(usize::from(index)).ok_or(
            VmError::RegisterIndexOutOfBounds {
                index,
                pc: self.pc,
                register_count,
            },
        )?;
        *slot = value;
        Ok(())
    }

    #[cfg(feature = "allocator-memory-limits")]
    pub(super) fn memory_check(&mut self) -> Result<()> {
        limits::check_memory_limit_if_needed().map_err(|err| match err {
            LimitError::MemoryLimitExceeded { usage, limit } => VmError::MemoryLimitExceeded {
                usage,
                limit,
                pc: self.pc,
            },
            other => VmError::Internal {
                message: format!("unexpected limit error: {other}"),
                pc: self.pc,
            },
        })
    }

    #[cfg(not(feature = "allocator-memory-limits"))]
    pub(super) fn memory_check(&mut self) -> Result<()> {
        Ok(())
    }
}