Skip to main content

nodedb_query/msgpack_scan/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Field extraction from raw MessagePack maps.
4//!
5//! Given a `&[u8]` containing a MessagePack map, extract the byte range
6//! of a value for a given key — without allocating or decoding.
7
8use crate::msgpack_scan::reader::{map_header, skip_value, str_bounds};
9
10/// A byte range `(start, end)` within a MessagePack buffer, pointing to
11/// a complete value (tag + payload). Use `read_f64`, `read_i64`, `read_str`
12/// etc. with `range.0` as the offset to decode the value.
13pub type FieldRange = (usize, usize);
14
15/// Locate the value for `field` in a MessagePack map starting at `offset`.
16/// Returns the byte range `(value_start, value_end)` of the matched value.
17///
18/// Scans map keys sequentially — O(n) in number of keys. For documents
19/// with many fields queried repeatedly, see structural indexing (Phase 8).
20///
21/// # Returns
22/// - `Some((start, end))` — the value's byte range (use offset `start` with readers)
23/// - `None` — field not found, or buffer is not a valid map
24pub fn extract_field(buf: &[u8], offset: usize, field: &str) -> Option<FieldRange> {
25    let (count, mut pos) = map_header(buf, offset)?;
26    let field_bytes = field.as_bytes();
27
28    for _ in 0..count {
29        // Read key string bounds
30        let key_match = match str_bounds(buf, pos) {
31            Some((start, len)) => buf
32                .get(start..start + len)
33                .map(|kb| kb == field_bytes)
34                .unwrap_or(false),
35            None => false,
36        };
37
38        // Skip past the key
39        pos = skip_value(buf, pos)?;
40
41        if key_match {
42            // Found — return the value's byte range
43            let value_start = pos;
44            let value_end = skip_value(buf, pos)?;
45            return Some((value_start, value_end));
46        }
47
48        // Skip the value
49        pos = skip_value(buf, pos)?;
50    }
51
52    None
53}
54
55/// Extract a value at a nested path (e.g., `["address", "city"]`).
56/// Each segment must be a string key in a nested map.
57pub fn extract_path(buf: &[u8], offset: usize, path: &[&str]) -> Option<FieldRange> {
58    if path.is_empty() {
59        return None;
60    }
61
62    let mut current_offset = offset;
63    for (i, segment) in path.iter().enumerate() {
64        let (value_start, value_end) = extract_field(buf, current_offset, segment)?;
65        if i == path.len() - 1 {
66            return Some((value_start, value_end));
67        }
68        // Intermediate segments must be maps — descend into the value
69        current_offset = value_start;
70    }
71
72    None
73}
74
75/// Extract a value using a dot-separated path string (e.g., `"address.city"`).
76/// Convenience wrapper over `extract_path`.
77pub fn extract_dot_path(buf: &[u8], offset: usize, dot_path: &str) -> Option<FieldRange> {
78    let segments: Vec<&str> = dot_path.split('.').collect();
79    extract_path(buf, offset, &segments)
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85    use crate::msgpack_scan::reader::{read_f64, read_i64, read_str};
86    use serde_json::json;
87
88    fn encode(v: &serde_json::Value) -> Vec<u8> {
89        nodedb_types::json_msgpack::json_to_msgpack(v).expect("encode")
90    }
91
92    #[test]
93    fn extract_integer_field() {
94        let buf = encode(&json!({"age": 25}));
95        let (start, _end) = extract_field(&buf, 0, "age").unwrap();
96        assert_eq!(read_i64(&buf, start), Some(25));
97    }
98
99    #[test]
100    fn extract_string_field() {
101        let buf = encode(&json!({"name": "alice"}));
102        let (start, _end) = extract_field(&buf, 0, "name").unwrap();
103        assert_eq!(read_str(&buf, start), Some("alice"));
104    }
105
106    #[test]
107    fn extract_float_field() {
108        let buf = encode(&json!({"score": 99.5}));
109        let (start, _end) = extract_field(&buf, 0, "score").unwrap();
110        assert_eq!(read_f64(&buf, start), Some(99.5));
111    }
112
113    #[test]
114    fn extract_missing_field() {
115        let buf = encode(&json!({"x": 1}));
116        assert!(extract_field(&buf, 0, "y").is_none());
117    }
118
119    #[test]
120    fn extract_multiple_fields() {
121        let buf = encode(&json!({"a": 10, "b": 20, "c": 30}));
122
123        let (s, _) = extract_field(&buf, 0, "a").unwrap();
124        assert_eq!(read_i64(&buf, s), Some(10));
125
126        let (s, _) = extract_field(&buf, 0, "b").unwrap();
127        assert_eq!(read_i64(&buf, s), Some(20));
128
129        let (s, _) = extract_field(&buf, 0, "c").unwrap();
130        assert_eq!(read_i64(&buf, s), Some(30));
131    }
132
133    #[test]
134    fn extract_nested_path() {
135        let buf = encode(&json!({"address": {"city": "tokyo"}}));
136        let (start, _end) = extract_path(&buf, 0, &["address", "city"]).unwrap();
137        assert_eq!(read_str(&buf, start), Some("tokyo"));
138    }
139
140    #[test]
141    fn extract_dot_path_works() {
142        let buf = encode(&json!({"addr": {"zip": "10001"}}));
143        let (start, _end) = extract_dot_path(&buf, 0, "addr.zip").unwrap();
144        assert_eq!(read_str(&buf, start), Some("10001"));
145    }
146
147    #[test]
148    fn extract_path_missing_intermediate() {
149        let buf = encode(&json!({"x": 1}));
150        assert!(extract_path(&buf, 0, &["x", "y"]).is_none());
151    }
152
153    #[test]
154    fn extract_empty_path() {
155        let buf = encode(&json!({}));
156        assert!(extract_path(&buf, 0, &[]).is_none());
157    }
158
159    #[test]
160    fn extract_from_large_map() {
161        let mut map = serde_json::Map::new();
162        for i in 0..20 {
163            map.insert(format!("field_{i}"), json!(i));
164        }
165        let buf = encode(&serde_json::Value::Object(map));
166        let (start, _end) = extract_field(&buf, 0, "field_9").unwrap();
167        assert_eq!(read_i64(&buf, start), Some(9));
168    }
169
170    #[test]
171    fn field_range_spans_entire_value() {
172        let buf = encode(&json!({"data": [1, 2, 3]}));
173        let (start, end) = extract_field(&buf, 0, "data").unwrap();
174        let value_bytes = &buf[start..end];
175        assert!(value_bytes.len() > 1);
176    }
177
178    // ── Fuzz-style tests ───────────────────────────────────────────────────
179
180    /// Truncate valid msgpack at every byte position — extract_field and
181    /// extract_path must never panic, returning None on truncated input.
182    #[test]
183    fn fuzz_truncated_buffers() {
184        let docs = [
185            json!({"name": "alice", "age": 30, "active": true}),
186            json!({"address": {"city": "tokyo", "zip": "100-0001"}}),
187            json!({"scores": [10, 20, 30], "ratio": 0.95}),
188        ];
189
190        for doc in &docs {
191            let full = encode(doc);
192            for truncate_at in 0..full.len() {
193                let slice = &full[..truncate_at];
194                let _ = extract_field(slice, 0, "name");
195                let _ = extract_field(slice, 0, "age");
196                let _ = extract_field(slice, 0, "missing");
197                let _ = extract_path(slice, 0, &["address", "city"]);
198                let _ = extract_dot_path(slice, 0, "address.city");
199            }
200        }
201    }
202
203    /// Deterministic random byte sequences — extract_field must never panic.
204    #[test]
205    fn fuzz_random_payloads() {
206        let mut state: u64 = 0xfeedface_0badf00d;
207        let next = |s: &mut u64| -> u8 {
208            *s = s
209                .wrapping_mul(6364136223846793005)
210                .wrapping_add(1442695040888963407);
211            (*s >> 33) as u8
212        };
213
214        let mut buf = [0u8; 128];
215        for _ in 0..1000 {
216            let len = (next(&mut state) as usize % 128) + 1;
217            for b in buf[..len].iter_mut() {
218                *b = next(&mut state);
219            }
220            let slice = &buf[..len];
221            let _ = extract_field(slice, 0, "key");
222            let _ = extract_path(slice, 0, &["a", "b", "c"]);
223            let _ = extract_dot_path(slice, 0, "x.y.z");
224        }
225    }
226
227    /// Adversarial: map header claims huge element count but buffer is tiny.
228    #[test]
229    fn fuzz_adversarial_map_count() {
230        // MAP32: tag 0xdf + 4-byte count claiming 0xffffffff pairs
231        let buf = [0xdfu8, 0xff, 0xff, 0xff, 0xff];
232        assert_eq!(extract_field(&buf, 0, "any"), None);
233
234        // MAP16: tag 0xde + 2-byte count claiming 0xffff pairs
235        let buf = [0xdeu8, 0xff, 0xff];
236        assert_eq!(extract_field(&buf, 0, "any"), None);
237
238        // Fixmap claims 15 pairs but is only 1 byte total
239        let buf = [0x8fu8];
240        assert_eq!(extract_field(&buf, 0, "key"), None);
241    }
242
243    /// Non-map input must return None for extract_field.
244    #[test]
245    fn fuzz_non_map_inputs() {
246        let array_buf = encode(&json!([1, 2, 3]));
247        assert_eq!(extract_field(&array_buf, 0, "x"), None);
248
249        let int_buf = encode(&json!(42));
250        assert_eq!(extract_field(&int_buf, 0, "x"), None);
251
252        let str_buf = encode(&json!("hello"));
253        assert_eq!(extract_field(&str_buf, 0, "x"), None);
254
255        let nil_buf = [0xc0u8];
256        assert_eq!(extract_field(&nil_buf, 0, "x"), None);
257    }
258
259    /// Out-of-bounds offset must return None.
260    #[test]
261    fn fuzz_out_of_bounds_offset() {
262        let buf = encode(&json!({"a": 1}));
263        assert_eq!(extract_field(&buf, buf.len() + 100, "a"), None);
264        assert_eq!(extract_path(&buf, buf.len() + 100, &["a"]), None);
265    }
266
267    /// Empty path and empty buffer edge cases.
268    #[test]
269    fn fuzz_edge_cases() {
270        // extract_path with empty path
271        let buf = encode(&json!({"a": 1}));
272        assert_eq!(extract_path(&buf, 0, &[]), None);
273
274        // extract_dot_path with empty dot_path string
275        assert_eq!(extract_dot_path(&buf, 0, ""), None);
276
277        // Empty buffer
278        assert_eq!(extract_field(&[], 0, "x"), None);
279        assert_eq!(extract_path(&[], 0, &["x"]), None);
280    }
281
282    /// Deeply nested path that bottoms out at a non-map value returns None
283    /// when trying to descend further.
284    #[test]
285    fn fuzz_path_descend_into_scalar() {
286        let buf = encode(&json!({"a": 42}));
287        // "a" is an integer, cannot descend into it
288        assert_eq!(extract_path(&buf, 0, &["a", "b"]), None);
289        assert_eq!(extract_dot_path(&buf, 0, "a.b"), None);
290    }
291}