Skip to main content

wacore_binary/
marshal.rs

1use std::io::Write;
2
3use crate::{
4    BinaryError, Node, NodeRef, Result,
5    decoder::Decoder,
6    encoder::{Encoder, build_marshaled_node_plan, build_marshaled_node_ref_plan},
7    node::{NodeContent, NodeContentRef},
8};
9
10const DEFAULT_MARSHAL_CAPACITY: usize = 1024;
11const AUTO_RESERVE_ATTRS_THRESHOLD: usize = 24;
12const AUTO_RESERVE_CHILDREN_THRESHOLD: usize = 64;
13const AUTO_RESERVE_SCALAR_THRESHOLD: usize = 8 * 1024;
14const AUTO_CHILD_SAMPLE_LIMIT: usize = 32;
15const AUTO_MAX_HINT_CAPACITY: usize = 512 * 1024;
16const AUTO_ATTR_ESTIMATE: usize = 24;
17const AUTO_CHILD_ESTIMATE: usize = 96;
18const AUTO_GRANDCHILD_ESTIMATE: usize = 40;
19
20pub fn unmarshal_ref(data: &[u8]) -> Result<NodeRef<'_>> {
21    let mut decoder = Decoder::new(data);
22    let node = decoder.read_node_ref()?;
23
24    if decoder.is_finished() {
25        Ok(node)
26    } else {
27        Err(BinaryError::LeftoverData(decoder.bytes_left()))
28    }
29}
30
31pub fn marshal_to(node: &Node, writer: &mut impl Write) -> Result<()> {
32    let mut encoder = Encoder::new(writer)?;
33    encoder.write_node(node)?;
34    Ok(())
35}
36
37/// Serialize an owned node directly into a `Vec<u8>` using the fast vec writer path.
38pub fn marshal_to_vec(node: &Node, output: &mut Vec<u8>) -> Result<()> {
39    let mut encoder = Encoder::new_vec(output)?;
40    encoder.write_node(node)?;
41    Ok(())
42}
43
44pub fn marshal(node: &Node) -> Result<Vec<u8>> {
45    let mut payload = Vec::with_capacity(DEFAULT_MARSHAL_CAPACITY);
46    marshal_to_vec(node, &mut payload)?;
47    Ok(payload)
48}
49
50/// Serialize a `Node` using a conservative auto strategy.
51///
52/// This keeps the fast one-pass path for typical payloads and only uses
53/// a lightweight preallocation hint for obviously larger payload shapes.
54pub fn marshal_auto(node: &Node) -> Result<Vec<u8>> {
55    if should_auto_reserve_node(node) {
56        marshal_with_capacity(node, estimate_capacity_node(node))
57    } else {
58        marshal(node)
59    }
60}
61
62/// Serialize a `Node` using a two-pass strategy:
63/// 1) compute exact encoded size
64/// 2) write directly into a fixed-size output buffer
65///
66/// This avoids output buffer growth/copies and can be beneficial for large/variable payloads.
67pub fn marshal_exact(node: &Node) -> Result<Vec<u8>> {
68    let plan = build_marshaled_node_plan(node);
69    let mut payload = vec![0; plan.size];
70    let mut encoder = Encoder::new_slice(payload.as_mut_slice(), Some(&plan.hints))?;
71    encoder.write_node(node)?;
72    let written = encoder.bytes_written();
73    debug_assert_eq!(written, payload.len(), "plan size mismatch for Node");
74    payload.truncate(written);
75    Ok(payload)
76}
77
78/// Zero-copy serialization of a `NodeRef` directly into a writer.
79/// This avoids the allocation overhead of converting to an owned `Node` first.
80pub fn marshal_ref_to(node: &NodeRef<'_>, writer: &mut impl Write) -> Result<()> {
81    let mut encoder = Encoder::new(writer)?;
82    encoder.write_node(node)?;
83    Ok(())
84}
85
86/// Serialize a borrowed node directly into a `Vec<u8>` using the fast vec writer path.
87pub fn marshal_ref_to_vec(node: &NodeRef<'_>, output: &mut Vec<u8>) -> Result<()> {
88    let mut encoder = Encoder::new_vec(output)?;
89    encoder.write_node(node)?;
90    Ok(())
91}
92
93/// Zero-copy serialization of a `NodeRef` to a new `Vec<u8>`.
94/// Prefer `marshal_ref_to` with a reusable buffer for best performance.
95pub fn marshal_ref(node: &NodeRef<'_>) -> Result<Vec<u8>> {
96    let mut payload = Vec::with_capacity(DEFAULT_MARSHAL_CAPACITY);
97    marshal_ref_to_vec(node, &mut payload)?;
98    Ok(payload)
99}
100
101/// Serialize a `NodeRef` using the same conservative auto strategy as `marshal_auto`.
102pub fn marshal_ref_auto(node: &NodeRef<'_>) -> Result<Vec<u8>> {
103    if should_auto_reserve_node_ref(node) {
104        marshal_ref_with_capacity(node, estimate_capacity_node_ref(node))
105    } else {
106        marshal_ref(node)
107    }
108}
109
110/// Serialize a `NodeRef` using a two-pass exact-size strategy.
111///
112/// This avoids output buffer growth/copies and preserves zero-copy input semantics.
113pub fn marshal_ref_exact(node: &NodeRef<'_>) -> Result<Vec<u8>> {
114    let plan = build_marshaled_node_ref_plan(node);
115    let mut payload = vec![0; plan.size];
116    let mut encoder = Encoder::new_slice(payload.as_mut_slice(), Some(&plan.hints))?;
117    encoder.write_node(node)?;
118    let written = encoder.bytes_written();
119    debug_assert_eq!(written, payload.len(), "plan size mismatch for NodeRef");
120    payload.truncate(written);
121    Ok(payload)
122}
123
124#[inline]
125fn marshal_with_capacity(node: &Node, capacity: usize) -> Result<Vec<u8>> {
126    let mut payload = Vec::with_capacity(capacity);
127    marshal_to_vec(node, &mut payload)?;
128    Ok(payload)
129}
130
131#[inline]
132fn marshal_ref_with_capacity(node: &NodeRef<'_>, capacity: usize) -> Result<Vec<u8>> {
133    let mut payload = Vec::with_capacity(capacity);
134    marshal_ref_to_vec(node, &mut payload)?;
135    Ok(payload)
136}
137
138#[inline]
139fn should_auto_reserve_node(node: &Node) -> bool {
140    if node.attrs.len() >= AUTO_RESERVE_ATTRS_THRESHOLD {
141        return true;
142    }
143
144    match &node.content {
145        Some(NodeContent::Bytes(bytes)) => bytes.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
146        Some(NodeContent::String(text)) => text.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
147        Some(NodeContent::Nodes(children)) => {
148            if children.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD {
149                return true;
150            }
151            // Check one level deeper for large nested lists (e.g., <iq> -> <list> -> 812 keys)
152            children.iter().any(|child| {
153                matches!(&child.content, Some(NodeContent::Nodes(gc)) if gc.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD)
154            })
155        }
156        None => false,
157    }
158}
159
160#[inline]
161fn should_auto_reserve_node_ref(node: &NodeRef<'_>) -> bool {
162    if node.attrs.len() >= AUTO_RESERVE_ATTRS_THRESHOLD {
163        return true;
164    }
165
166    match node.content.as_deref() {
167        Some(NodeContentRef::Bytes(bytes)) => bytes.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
168        Some(NodeContentRef::String(text)) => text.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
169        Some(NodeContentRef::Nodes(children)) => {
170            if children.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD {
171                return true;
172            }
173            // Check one level deeper for large nested lists (e.g., <iq> -> <list> -> 812 keys)
174            children.iter().any(|child| {
175                matches!(child.content.as_deref(), Some(NodeContentRef::Nodes(gc)) if gc.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD)
176            })
177        }
178        None => false,
179    }
180}
181
182#[inline]
183fn estimate_capacity_node(node: &Node) -> usize {
184    let mut estimate = DEFAULT_MARSHAL_CAPACITY + 16;
185    estimate += node.tag.len();
186    estimate += node.attrs.len() * AUTO_ATTR_ESTIMATE;
187
188    match &node.content {
189        Some(NodeContent::Bytes(bytes)) => {
190            estimate += bytes.len() + 8;
191        }
192        Some(NodeContent::String(text)) => {
193            estimate += text.len() + 8;
194        }
195        Some(NodeContent::Nodes(children)) => {
196            estimate += children.len() * AUTO_CHILD_ESTIMATE;
197            for child in children.iter().take(AUTO_CHILD_SAMPLE_LIMIT) {
198                estimate += child.tag.len() + child.attrs.len() * AUTO_ATTR_ESTIMATE;
199                match &child.content {
200                    Some(NodeContent::Bytes(bytes)) => estimate += bytes.len() + 8,
201                    Some(NodeContent::String(text)) => estimate += text.len() + 8,
202                    Some(NodeContent::Nodes(grand_children)) => {
203                        estimate += grand_children.len() * AUTO_GRANDCHILD_ESTIMATE;
204                    }
205                    None => {}
206                }
207                if estimate >= AUTO_MAX_HINT_CAPACITY {
208                    return AUTO_MAX_HINT_CAPACITY;
209                }
210            }
211        }
212        None => {}
213    }
214
215    estimate.clamp(DEFAULT_MARSHAL_CAPACITY, AUTO_MAX_HINT_CAPACITY)
216}
217
218#[inline]
219fn estimate_capacity_node_ref(node: &NodeRef<'_>) -> usize {
220    let mut estimate = DEFAULT_MARSHAL_CAPACITY + 16;
221    estimate += node.tag.len();
222    estimate += node.attrs.len() * AUTO_ATTR_ESTIMATE;
223
224    match node.content.as_deref() {
225        Some(NodeContentRef::Bytes(bytes)) => {
226            estimate += bytes.len() + 8;
227        }
228        Some(NodeContentRef::String(text)) => {
229            estimate += text.len() + 8;
230        }
231        Some(NodeContentRef::Nodes(children)) => {
232            estimate += children.len() * AUTO_CHILD_ESTIMATE;
233            for child in children.iter().take(AUTO_CHILD_SAMPLE_LIMIT) {
234                estimate += child.tag.len() + child.attrs.len() * AUTO_ATTR_ESTIMATE;
235                match child.content.as_deref() {
236                    Some(NodeContentRef::Bytes(bytes)) => estimate += bytes.len() + 8,
237                    Some(NodeContentRef::String(text)) => estimate += text.len() + 8,
238                    Some(NodeContentRef::Nodes(grand_children)) => {
239                        estimate += grand_children.len() * AUTO_GRANDCHILD_ESTIMATE;
240                    }
241                    None => {}
242                }
243                if estimate >= AUTO_MAX_HINT_CAPACITY {
244                    return AUTO_MAX_HINT_CAPACITY;
245                }
246            }
247        }
248        None => {}
249    }
250
251    estimate.clamp(DEFAULT_MARSHAL_CAPACITY, AUTO_MAX_HINT_CAPACITY)
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::jid::Jid;
258    use crate::node::{Attrs, NodeContent, NodeValue};
259
260    type TestResult = crate::error::Result<()>;
261
262    fn fixture_node() -> Node {
263        let mut attrs = Attrs::with_capacity(4);
264        attrs.push("id".to_string(), "ABC123");
265        attrs.push("to".to_string(), "123456789@s.whatsapp.net");
266        attrs.push(
267            "participant".to_string(),
268            NodeValue::Jid("15551234567@s.whatsapp.net".parse::<Jid>().unwrap()),
269        );
270        attrs.push("hex".to_string(), "DEADBEEF");
271
272        let child = Node::new(
273            "item",
274            Attrs::new(),
275            Some(NodeContent::Bytes(vec![1, 2, 3, 4, 5, 6, 7, 8])),
276        );
277
278        Node::new(
279            "message",
280            attrs,
281            Some(NodeContent::Nodes(vec![
282                child,
283                Node::new(
284                    "text",
285                    Attrs::new(),
286                    Some(NodeContent::String("hello".repeat(40).into())),
287                ),
288            ])),
289        )
290    }
291
292    fn large_binary_fixture() -> Node {
293        Node::new(
294            "message",
295            Attrs::new(),
296            Some(NodeContent::Bytes(vec![
297                0xAB;
298                AUTO_RESERVE_SCALAR_THRESHOLD + 2048
299            ])),
300        )
301    }
302
303    #[test]
304    fn test_marshaled_node_size_matches_output() -> TestResult {
305        let node = fixture_node();
306        let plan = build_marshaled_node_plan(&node);
307        let payload = marshal(&node)?;
308        assert_eq!(payload.len(), plan.size);
309        Ok(())
310    }
311
312    #[test]
313    fn test_marshaled_node_ref_size_matches_output() -> TestResult {
314        let node = fixture_node();
315        let node_ref = node.as_node_ref();
316        let plan = build_marshaled_node_ref_plan(&node_ref);
317        let payload = marshal_ref(&node_ref)?;
318        assert_eq!(payload.len(), plan.size);
319        Ok(())
320    }
321
322    #[test]
323    fn test_marshal_matches_marshal_to_bytes() -> TestResult {
324        let node = fixture_node();
325
326        let payload_alloc = marshal(&node)?;
327
328        let mut payload_writer = Vec::new();
329        marshal_to(&node, &mut payload_writer)?;
330
331        assert_eq!(payload_alloc, payload_writer);
332        Ok(())
333    }
334
335    #[test]
336    fn test_marshal_ref_matches_marshal_ref_to_bytes() -> TestResult {
337        let node = fixture_node();
338        let node_ref = node.as_node_ref();
339
340        let payload_alloc = marshal_ref(&node_ref)?;
341
342        let mut payload_writer = Vec::new();
343        marshal_ref_to(&node_ref, &mut payload_writer)?;
344
345        assert_eq!(payload_alloc, payload_writer);
346        Ok(())
347    }
348
349    #[test]
350    fn test_marshal_to_vec_matches_marshal_to() -> TestResult {
351        let node = fixture_node();
352
353        let mut payload_vec_writer = Vec::new();
354        marshal_to_vec(&node, &mut payload_vec_writer)?;
355
356        let mut payload_writer = Vec::new();
357        marshal_to(&node, &mut payload_writer)?;
358
359        assert_eq!(payload_vec_writer, payload_writer);
360        Ok(())
361    }
362
363    #[test]
364    fn test_marshal_ref_to_vec_matches_marshal_ref_to() -> TestResult {
365        let node = fixture_node();
366        let node_ref = node.as_node_ref();
367
368        let mut payload_vec_writer = Vec::new();
369        marshal_ref_to_vec(&node_ref, &mut payload_vec_writer)?;
370
371        let mut payload_writer = Vec::new();
372        marshal_ref_to(&node_ref, &mut payload_writer)?;
373
374        assert_eq!(payload_vec_writer, payload_writer);
375        Ok(())
376    }
377
378    #[test]
379    fn test_marshal_exact_matches_marshal_to_bytes() -> TestResult {
380        let node = fixture_node();
381
382        let payload_exact = marshal_exact(&node)?;
383
384        let mut payload_writer = Vec::new();
385        marshal_to(&node, &mut payload_writer)?;
386
387        assert_eq!(payload_exact, payload_writer);
388        Ok(())
389    }
390
391    #[test]
392    fn test_marshal_ref_exact_matches_marshal_ref_to_bytes() -> TestResult {
393        let node = fixture_node();
394        let node_ref = node.as_node_ref();
395
396        let payload_exact = marshal_ref_exact(&node_ref)?;
397
398        let mut payload_writer = Vec::new();
399        marshal_ref_to(&node_ref, &mut payload_writer)?;
400
401        assert_eq!(payload_exact, payload_writer);
402        Ok(())
403    }
404
405    #[test]
406    fn test_marshal_auto_matches_marshal_to_bytes() -> TestResult {
407        let node = fixture_node();
408        let payload_auto = marshal_auto(&node)?;
409
410        let mut payload_writer = Vec::new();
411        marshal_to(&node, &mut payload_writer)?;
412
413        assert_eq!(payload_auto, payload_writer);
414        Ok(())
415    }
416
417    #[test]
418    fn test_marshal_ref_auto_matches_marshal_ref_to_bytes() -> TestResult {
419        let node = fixture_node();
420        let node_ref = node.as_node_ref();
421        let payload_auto = marshal_ref_auto(&node_ref)?;
422
423        let mut payload_writer = Vec::new();
424        marshal_ref_to(&node_ref, &mut payload_writer)?;
425
426        assert_eq!(payload_auto, payload_writer);
427        Ok(())
428    }
429
430    #[test]
431    fn test_marshal_auto_large_binary_matches_marshal_to_bytes() -> TestResult {
432        let node = large_binary_fixture();
433        let payload_auto = marshal_auto(&node)?;
434
435        let mut payload_writer = Vec::new();
436        marshal_to(&node, &mut payload_writer)?;
437
438        assert_eq!(payload_auto, payload_writer);
439        Ok(())
440    }
441
442    #[test]
443    fn test_marshal_ref_auto_large_binary_matches_marshal_ref_to_bytes() -> TestResult {
444        let node = large_binary_fixture();
445        let node_ref = node.as_node_ref();
446        let payload_auto = marshal_ref_auto(&node_ref)?;
447
448        let mut payload_writer = Vec::new();
449        marshal_ref_to(&node_ref, &mut payload_writer)?;
450
451        assert_eq!(payload_auto, payload_writer);
452        Ok(())
453    }
454}