Skip to main content

nodedb_query/msgpack_scan/
field.rs

1//! Field extraction from raw MessagePack maps.
2//!
3//! Given a `&[u8]` containing a MessagePack map, extract the byte range
4//! of a value for a given key — without allocating or decoding.
5
6use crate::msgpack_scan::reader::{map_header, skip_value, str_bounds};
7
8/// A byte range `(start, end)` within a MessagePack buffer, pointing to
9/// a complete value (tag + payload). Use `read_f64`, `read_i64`, `read_str`
10/// etc. with `range.0` as the offset to decode the value.
11pub type FieldRange = (usize, usize);
12
13/// Locate the value for `field` in a MessagePack map starting at `offset`.
14/// Returns the byte range `(value_start, value_end)` of the matched value.
15///
16/// Scans map keys sequentially — O(n) in number of keys. For documents
17/// with many fields queried repeatedly, see structural indexing (Phase 8).
18///
19/// # Returns
20/// - `Some((start, end))` — the value's byte range (use offset `start` with readers)
21/// - `None` — field not found, or buffer is not a valid map
22pub fn extract_field(buf: &[u8], offset: usize, field: &str) -> Option<FieldRange> {
23    let (count, mut pos) = map_header(buf, offset)?;
24    let field_bytes = field.as_bytes();
25
26    for _ in 0..count {
27        // Read key string bounds
28        let key_match = match str_bounds(buf, pos) {
29            Some((start, len)) => buf
30                .get(start..start + len)
31                .map(|kb| kb == field_bytes)
32                .unwrap_or(false),
33            None => false,
34        };
35
36        // Skip past the key
37        pos = skip_value(buf, pos)?;
38
39        if key_match {
40            // Found — return the value's byte range
41            let value_start = pos;
42            let value_end = skip_value(buf, pos)?;
43            return Some((value_start, value_end));
44        }
45
46        // Skip the value
47        pos = skip_value(buf, pos)?;
48    }
49
50    None
51}
52
53/// Extract a value at a nested path (e.g., `["address", "city"]`).
54/// Each segment must be a string key in a nested map.
55pub fn extract_path(buf: &[u8], offset: usize, path: &[&str]) -> Option<FieldRange> {
56    if path.is_empty() {
57        return None;
58    }
59
60    let mut current_offset = offset;
61    for (i, segment) in path.iter().enumerate() {
62        let (value_start, value_end) = extract_field(buf, current_offset, segment)?;
63        if i == path.len() - 1 {
64            return Some((value_start, value_end));
65        }
66        // Intermediate segments must be maps — descend into the value
67        current_offset = value_start;
68    }
69
70    None
71}
72
73/// Extract a value using a dot-separated path string (e.g., `"address.city"`).
74/// Convenience wrapper over `extract_path`.
75pub fn extract_dot_path(buf: &[u8], offset: usize, dot_path: &str) -> Option<FieldRange> {
76    let segments: Vec<&str> = dot_path.split('.').collect();
77    extract_path(buf, offset, &segments)
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83    use crate::msgpack_scan::reader::{read_f64, read_i64, read_str};
84    use serde_json::json;
85
86    fn encode(v: &serde_json::Value) -> Vec<u8> {
87        nodedb_types::json_msgpack::json_to_msgpack(v).expect("encode")
88    }
89
90    #[test]
91    fn extract_integer_field() {
92        let buf = encode(&json!({"age": 25}));
93        let (start, _end) = extract_field(&buf, 0, "age").unwrap();
94        assert_eq!(read_i64(&buf, start), Some(25));
95    }
96
97    #[test]
98    fn extract_string_field() {
99        let buf = encode(&json!({"name": "alice"}));
100        let (start, _end) = extract_field(&buf, 0, "name").unwrap();
101        assert_eq!(read_str(&buf, start), Some("alice"));
102    }
103
104    #[test]
105    fn extract_float_field() {
106        let buf = encode(&json!({"score": 99.5}));
107        let (start, _end) = extract_field(&buf, 0, "score").unwrap();
108        assert_eq!(read_f64(&buf, start), Some(99.5));
109    }
110
111    #[test]
112    fn extract_missing_field() {
113        let buf = encode(&json!({"x": 1}));
114        assert!(extract_field(&buf, 0, "y").is_none());
115    }
116
117    #[test]
118    fn extract_multiple_fields() {
119        let buf = encode(&json!({"a": 10, "b": 20, "c": 30}));
120
121        let (s, _) = extract_field(&buf, 0, "a").unwrap();
122        assert_eq!(read_i64(&buf, s), Some(10));
123
124        let (s, _) = extract_field(&buf, 0, "b").unwrap();
125        assert_eq!(read_i64(&buf, s), Some(20));
126
127        let (s, _) = extract_field(&buf, 0, "c").unwrap();
128        assert_eq!(read_i64(&buf, s), Some(30));
129    }
130
131    #[test]
132    fn extract_nested_path() {
133        let buf = encode(&json!({"address": {"city": "tokyo"}}));
134        let (start, _end) = extract_path(&buf, 0, &["address", "city"]).unwrap();
135        assert_eq!(read_str(&buf, start), Some("tokyo"));
136    }
137
138    #[test]
139    fn extract_dot_path_works() {
140        let buf = encode(&json!({"addr": {"zip": "10001"}}));
141        let (start, _end) = extract_dot_path(&buf, 0, "addr.zip").unwrap();
142        assert_eq!(read_str(&buf, start), Some("10001"));
143    }
144
145    #[test]
146    fn extract_path_missing_intermediate() {
147        let buf = encode(&json!({"x": 1}));
148        assert!(extract_path(&buf, 0, &["x", "y"]).is_none());
149    }
150
151    #[test]
152    fn extract_empty_path() {
153        let buf = encode(&json!({}));
154        assert!(extract_path(&buf, 0, &[]).is_none());
155    }
156
157    #[test]
158    fn extract_from_large_map() {
159        let mut map = serde_json::Map::new();
160        for i in 0..20 {
161            map.insert(format!("field_{i}"), json!(i));
162        }
163        let buf = encode(&serde_json::Value::Object(map));
164        let (start, _end) = extract_field(&buf, 0, "field_9").unwrap();
165        assert_eq!(read_i64(&buf, start), Some(9));
166    }
167
168    #[test]
169    fn field_range_spans_entire_value() {
170        let buf = encode(&json!({"data": [1, 2, 3]}));
171        let (start, end) = extract_field(&buf, 0, "data").unwrap();
172        let value_bytes = &buf[start..end];
173        assert!(value_bytes.len() > 1);
174    }
175
176    // ── Fuzz-style tests ───────────────────────────────────────────────────
177
178    /// Truncate valid msgpack at every byte position — extract_field and
179    /// extract_path must never panic, returning None on truncated input.
180    #[test]
181    fn fuzz_truncated_buffers() {
182        let docs = [
183            json!({"name": "alice", "age": 30, "active": true}),
184            json!({"address": {"city": "tokyo", "zip": "100-0001"}}),
185            json!({"scores": [10, 20, 30], "ratio": 0.95}),
186        ];
187
188        for doc in &docs {
189            let full = encode(doc);
190            for truncate_at in 0..full.len() {
191                let slice = &full[..truncate_at];
192                let _ = extract_field(slice, 0, "name");
193                let _ = extract_field(slice, 0, "age");
194                let _ = extract_field(slice, 0, "missing");
195                let _ = extract_path(slice, 0, &["address", "city"]);
196                let _ = extract_dot_path(slice, 0, "address.city");
197            }
198        }
199    }
200
201    /// Deterministic random byte sequences — extract_field must never panic.
202    #[test]
203    fn fuzz_random_payloads() {
204        let mut state: u64 = 0xfeedface_0badf00d;
205        let next = |s: &mut u64| -> u8 {
206            *s = s
207                .wrapping_mul(6364136223846793005)
208                .wrapping_add(1442695040888963407);
209            (*s >> 33) as u8
210        };
211
212        let mut buf = [0u8; 128];
213        for _ in 0..1000 {
214            let len = (next(&mut state) as usize % 128) + 1;
215            for b in buf[..len].iter_mut() {
216                *b = next(&mut state);
217            }
218            let slice = &buf[..len];
219            let _ = extract_field(slice, 0, "key");
220            let _ = extract_path(slice, 0, &["a", "b", "c"]);
221            let _ = extract_dot_path(slice, 0, "x.y.z");
222        }
223    }
224
225    /// Adversarial: map header claims huge element count but buffer is tiny.
226    #[test]
227    fn fuzz_adversarial_map_count() {
228        // MAP32: tag 0xdf + 4-byte count claiming 0xffffffff pairs
229        let buf = [0xdfu8, 0xff, 0xff, 0xff, 0xff];
230        assert_eq!(extract_field(&buf, 0, "any"), None);
231
232        // MAP16: tag 0xde + 2-byte count claiming 0xffff pairs
233        let buf = [0xdeu8, 0xff, 0xff];
234        assert_eq!(extract_field(&buf, 0, "any"), None);
235
236        // Fixmap claims 15 pairs but is only 1 byte total
237        let buf = [0x8fu8];
238        assert_eq!(extract_field(&buf, 0, "key"), None);
239    }
240
241    /// Non-map input must return None for extract_field.
242    #[test]
243    fn fuzz_non_map_inputs() {
244        let array_buf = encode(&json!([1, 2, 3]));
245        assert_eq!(extract_field(&array_buf, 0, "x"), None);
246
247        let int_buf = encode(&json!(42));
248        assert_eq!(extract_field(&int_buf, 0, "x"), None);
249
250        let str_buf = encode(&json!("hello"));
251        assert_eq!(extract_field(&str_buf, 0, "x"), None);
252
253        let nil_buf = [0xc0u8];
254        assert_eq!(extract_field(&nil_buf, 0, "x"), None);
255    }
256
257    /// Out-of-bounds offset must return None.
258    #[test]
259    fn fuzz_out_of_bounds_offset() {
260        let buf = encode(&json!({"a": 1}));
261        assert_eq!(extract_field(&buf, buf.len() + 100, "a"), None);
262        assert_eq!(extract_path(&buf, buf.len() + 100, &["a"]), None);
263    }
264
265    /// Empty path and empty buffer edge cases.
266    #[test]
267    fn fuzz_edge_cases() {
268        // extract_path with empty path
269        let buf = encode(&json!({"a": 1}));
270        assert_eq!(extract_path(&buf, 0, &[]), None);
271
272        // extract_dot_path with empty dot_path string
273        assert_eq!(extract_dot_path(&buf, 0, ""), None);
274
275        // Empty buffer
276        assert_eq!(extract_field(&[], 0, "x"), None);
277        assert_eq!(extract_path(&[], 0, &["x"]), None);
278    }
279
280    /// Deeply nested path that bottoms out at a non-map value returns None
281    /// when trying to descend further.
282    #[test]
283    fn fuzz_path_descend_into_scalar() {
284        let buf = encode(&json!({"a": 42}));
285        // "a" is an integer, cannot descend into it
286        assert_eq!(extract_path(&buf, 0, &["a", "b"]), None);
287        assert_eq!(extract_dot_path(&buf, 0, "a.b"), None);
288    }
289}