Skip to main content

sim_codec/implementation/
limits.rs

1//! Decode resource limits and budgets shared by every codec decode path.
2//!
3//! Defines `DecodeLimits` (the resource ceilings applied to untrusted input),
4//! the per-decode `DecodeBudget` counters, and the `ReadCx` decode context.
5
6use sim_kernel::{CodecId, Cx, Error, ReadPolicy, Result};
7
8/// Resource ceiling shared by every codec decode path.
9///
10/// These bounds are applied to untrusted input before bulk allocation where
11/// possible. They are deliberately generous for normal data and only reject
12/// pathological input.
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub struct DecodeLimits {
15    /// Maximum total input size, in bytes.
16    pub max_input_bytes: usize,
17    /// Maximum number of tokens the decoder may produce.
18    pub max_tokens: usize,
19    /// Maximum number of `Expr` nodes in the decoded result.
20    pub max_expr_nodes: usize,
21    /// Maximum nesting depth.
22    pub max_depth: usize,
23    /// Maximum length of a single decoded string, in bytes.
24    pub max_string_bytes: usize,
25    /// Maximum length of a single decoded byte blob.
26    pub max_blob_bytes: usize,
27    /// Maximum length of a single decoded collection (list, vector, map, set).
28    pub max_collection_len: usize,
29    /// Maximum number of trivia items (comments, whitespace) retained.
30    pub max_trivia_items: usize,
31}
32
33impl Default for DecodeLimits {
34    fn default() -> Self {
35        Self {
36            max_input_bytes: 8 * 1024 * 1024,
37            max_tokens: 1_000_000,
38            max_expr_nodes: 200_000,
39            max_depth: 512,
40            max_string_bytes: 256 * 1024,
41            max_blob_bytes: 8 * 1024 * 1024,
42            max_collection_len: 65_536,
43            max_trivia_items: 16_384,
44        }
45    }
46}
47
48/// Running counters for a single decode. Construct one per decode call.
49pub struct DecodeBudget {
50    limits: DecodeLimits,
51    nodes: usize,
52    trivia: usize,
53}
54
55impl DecodeBudget {
56    /// Create a fresh budget with zeroed counters for the given `limits`.
57    pub fn new(limits: DecodeLimits) -> Self {
58        Self {
59            limits,
60            nodes: 0,
61            trivia: 0,
62        }
63    }
64
65    /// The [`DecodeLimits`] this budget enforces.
66    pub fn limits(&self) -> DecodeLimits {
67        self.limits
68    }
69
70    /// Check input size against [`DecodeLimits::max_input_bytes`].
71    pub fn check_input_bytes(&self, codec: CodecId, len: usize) -> Result<()> {
72        self.check(codec, "input bytes", len, self.limits.max_input_bytes)
73    }
74
75    /// Check token count against [`DecodeLimits::max_tokens`].
76    pub fn check_tokens(&self, codec: CodecId, count: usize) -> Result<()> {
77        self.check(codec, "tokens", count, self.limits.max_tokens)
78    }
79
80    /// Check collection length against [`DecodeLimits::max_collection_len`].
81    pub fn check_collection_len(&self, codec: CodecId, len: usize) -> Result<()> {
82        self.check(
83            codec,
84            "collection length",
85            len,
86            self.limits.max_collection_len,
87        )
88    }
89
90    /// Check string length against [`DecodeLimits::max_string_bytes`].
91    pub fn check_string_bytes(&self, codec: CodecId, len: usize) -> Result<()> {
92        self.check(codec, "string bytes", len, self.limits.max_string_bytes)
93    }
94
95    /// Check blob length against [`DecodeLimits::max_blob_bytes`].
96    pub fn check_blob_bytes(&self, codec: CodecId, len: usize) -> Result<()> {
97        self.check(codec, "blob bytes", len, self.limits.max_blob_bytes)
98    }
99
100    /// Charge one trivia item and check the running total against
101    /// [`DecodeLimits::max_trivia_items`].
102    pub fn add_trivia(&mut self, codec: CodecId) -> Result<()> {
103        self.trivia += 1;
104        self.check(
105            codec,
106            "trivia items",
107            self.trivia,
108            self.limits.max_trivia_items,
109        )
110    }
111
112    /// Charge one `Expr` node and check both the running node count against
113    /// [`DecodeLimits::max_expr_nodes`] and `depth` against
114    /// [`DecodeLimits::max_depth`].
115    pub fn enter_node(&mut self, codec: CodecId, depth: usize) -> Result<()> {
116        self.nodes += 1;
117        self.check(codec, "expr nodes", self.nodes, self.limits.max_expr_nodes)?;
118        self.check(codec, "recursion depth", depth, self.limits.max_depth)
119    }
120
121    fn check(&self, codec: CodecId, what: &str, got: usize, max: usize) -> Result<()> {
122        if got > max {
123            return Err(Error::CodecError {
124                codec,
125                message: format!("decode {what} limit exceeded: {got} > {max}"),
126            });
127        }
128        Ok(())
129    }
130}
131
132/// The decode context threaded through every [`Decoder`](crate::Decoder): the
133/// kernel context plus the active codec id, read policy, and resource limits.
134pub struct ReadCx<'a> {
135    /// The kernel context the decode runs against.
136    pub cx: &'a mut Cx,
137    /// Id of the codec performing the decode (used to tag errors).
138    pub codec: CodecId,
139    /// The read policy governing what the decode may admit.
140    pub read_policy: ReadPolicy,
141    /// Resource ceilings applied to this decode.
142    pub limits: DecodeLimits,
143}