Skip to main content

ferrule_sql/
guard.rs

1//! Constant-memory size guards for the read paths.
2//!
3//! A pathological result — one 512 MB `bytea`/`TEXT` cell, an
4//! unexpectedly wide row, or a huge table the CLI tries to buffer whole
5//! for table rendering — can OOM-kill the process. These guards turn
6//! that into a clean, *bounded* failure: each cell and row is measured
7//! against a cap **before** it is retained, and the eager `query` path
8//! keeps a running byte tally so it aborts long before an unbounded
9//! `Vec<Row>` exhausts memory.
10//!
11//! **Constant-memory by construction.** This is deliberately *not* an
12//! RSS-accounting budget. [`Value::byte_size`](crate::value::Value::byte_size)
13//! is a cheap monotonic proxy; the guards are coarse ceilings checked
14//! incrementally so the checker itself never allocates proportional to
15//! the data it inspects. A `0` cap means "unlimited" for that dimension.
16
17use crate::error::SqlError;
18use crate::value::{ColumnInfo, Value};
19
20/// Per-connection size ceilings applied to every read.
21///
22/// All three caps are byte counts; `0` disables that dimension. The
23/// defaults are generous enough that ordinary OLTP rows never trip them
24/// yet still cap a runaway result well short of an OOM:
25///
26/// - `max_cell_bytes` (default 64 MiB) — a single value's payload.
27/// - `max_row_bytes` (default 256 MiB) — one row's summed cell payloads.
28/// - `max_total_buffered_bytes` (default 1 GiB) — the running total an
29///   eager [`query`](crate::Connection::query) is allowed to materialize
30///   before failing with [`SqlError::BufferTooLarge`]. The streaming
31///   cursor does **not** apply this total (it never buffers the whole
32///   result); it applies only the per-cell and per-row caps.
33///
34/// Construct with [`SizeGuards::unlimited`] to opt out entirely, or
35/// tweak individual fields. The guards are copied into each connection
36/// at connect time and consulted on every row.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct SizeGuards {
39    /// Max payload bytes for a single cell; `0` = unlimited.
40    pub max_cell_bytes: usize,
41    /// Max summed payload bytes for one row; `0` = unlimited.
42    pub max_row_bytes: usize,
43    /// Max running total an eager `query` may buffer; `0` = unlimited.
44    /// Ignored by the streaming cursor, which is bounded by design.
45    pub max_total_buffered_bytes: usize,
46}
47
48impl Default for SizeGuards {
49    fn default() -> Self {
50        Self {
51            max_cell_bytes: 64 * 1024 * 1024,
52            max_row_bytes: 256 * 1024 * 1024,
53            max_total_buffered_bytes: 1024 * 1024 * 1024,
54        }
55    }
56}
57
58impl SizeGuards {
59    /// Disable every guard. Use only when the caller has its own bound
60    /// on result size and wants ferrule-sql to impose none.
61    #[must_use]
62    pub fn unlimited() -> Self {
63        Self {
64            max_cell_bytes: 0,
65            max_row_bytes: 0,
66            max_total_buffered_bytes: 0,
67        }
68    }
69
70    /// Check one fully-decoded row against the per-cell and per-row
71    /// caps. Returns a structured [`SqlError`] naming the offending
72    /// row/column the instant a cap is crossed, so the caller never
73    /// retains an over-budget row.
74    ///
75    /// `row_ordinal` is the 0-based position of this row within the
76    /// current result (used only for diagnostics). `columns` supplies
77    /// the column names for the [`SqlError::CellTooLarge`] message; when
78    /// shorter than the row (or empty) the column index is rendered
79    /// instead. This performs no allocation proportional to row size —
80    /// it sums precomputed [`Value::byte_size`] values.
81    ///
82    /// [`Value::byte_size`]: crate::value::Value::byte_size
83    pub fn check_row(
84        &self,
85        row_ordinal: u64,
86        row: &[Value],
87        columns: &[ColumnInfo],
88    ) -> Result<(), SqlError> {
89        let mut row_total: usize = 0;
90        for (i, cell) in row.iter().enumerate() {
91            let size = cell.byte_size();
92            if self.max_cell_bytes != 0 && size > self.max_cell_bytes {
93                return Err(SqlError::CellTooLarge {
94                    row: row_ordinal,
95                    column: column_label(columns, i),
96                    size,
97                    cap: self.max_cell_bytes,
98                });
99            }
100            row_total = row_total.saturating_add(size);
101            if self.max_row_bytes != 0 && row_total > self.max_row_bytes {
102                return Err(SqlError::RowTooLarge {
103                    row: row_ordinal,
104                    size: row_total,
105                    cap: self.max_row_bytes,
106                });
107            }
108        }
109        Ok(())
110    }
111
112    /// True when this configuration buffers an eager result under a
113    /// finite total cap. The eager `query` path uses this to decide
114    /// whether to maintain the running byte tally at all.
115    #[must_use]
116    pub fn caps_total(&self) -> bool {
117        self.max_total_buffered_bytes != 0
118    }
119}
120
121/// Render a column label for a [`SqlError::CellTooLarge`] message:
122/// the declared column name when available, otherwise the bare index.
123fn column_label(columns: &[ColumnInfo], idx: usize) -> String {
124    columns
125        .get(idx)
126        .map(|c| c.name.clone())
127        .unwrap_or_else(|| format!("#{idx}"))
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use crate::value::TypeHint;
134
135    fn col(name: &str) -> ColumnInfo {
136        ColumnInfo {
137            name: name.to_string(),
138            type_hint: TypeHint::Other,
139            nullable: true,
140        }
141    }
142
143    #[test]
144    fn unlimited_passes_everything() {
145        let g = SizeGuards::unlimited();
146        let row = vec![Value::Bytes(vec![0u8; 10_000_000]), Value::Int64(1)];
147        g.check_row(0, &row, &[col("blob"), col("n")])
148            .expect("unlimited guards never trip");
149    }
150
151    #[test]
152    fn oversized_cell_fails_with_column_name() {
153        let g = SizeGuards {
154            max_cell_bytes: 1024,
155            max_row_bytes: 0,
156            max_total_buffered_bytes: 0,
157        };
158        let row = vec![Value::Int64(7), Value::String("x".repeat(2048))];
159        let err = g
160            .check_row(3, &row, &[col("id"), col("payload")])
161            .expect_err("oversized cell must fail");
162        match err {
163            SqlError::CellTooLarge {
164                row,
165                column,
166                size,
167                cap,
168            } => {
169                assert_eq!(row, 3);
170                assert_eq!(column, "payload");
171                assert_eq!(size, 2048);
172                assert_eq!(cap, 1024);
173            }
174            other => panic!("expected CellTooLarge, got {other:?}"),
175        }
176    }
177
178    #[test]
179    fn oversized_row_fails_even_when_each_cell_fits() {
180        // Each cell is 600 bytes (under the 1000-byte cell cap) but the
181        // row sum (1200) crosses the 1000-byte row cap.
182        let g = SizeGuards {
183            max_cell_bytes: 1000,
184            max_row_bytes: 1000,
185            max_total_buffered_bytes: 0,
186        };
187        let row = vec![
188            Value::String("a".repeat(600)),
189            Value::String("b".repeat(600)),
190        ];
191        let err = g
192            .check_row(0, &row, &[col("a"), col("b")])
193            .expect_err("oversized row must fail");
194        assert!(matches!(err, SqlError::RowTooLarge { row: 0, .. }));
195    }
196
197    #[test]
198    fn cell_label_falls_back_to_index_without_metadata() {
199        let g = SizeGuards {
200            max_cell_bytes: 8,
201            max_row_bytes: 0,
202            max_total_buffered_bytes: 0,
203        };
204        let row = vec![Value::String("too long".repeat(4))];
205        let err = g.check_row(0, &row, &[]).expect_err("must fail");
206        match err {
207            SqlError::CellTooLarge { column, .. } => assert_eq!(column, "#0"),
208            other => panic!("expected CellTooLarge, got {other:?}"),
209        }
210    }
211}