Skip to main content

wa_rs_binary/
marshal.rs

1use std::io::Write;
2
3use crate::{
4    BinaryError, Node, NodeRef, Result,
5    decoder::Decoder,
6    encoder::{Encoder, build_marshaled_node_plan, build_marshaled_node_ref_plan},
7    node::{NodeContent, NodeContentRef},
8};
9
10const DEFAULT_MARSHAL_CAPACITY: usize = 1024;
11const AUTO_RESERVE_ATTRS_THRESHOLD: usize = 24;
12const AUTO_RESERVE_CHILDREN_THRESHOLD: usize = 64;
13const AUTO_RESERVE_SCALAR_THRESHOLD: usize = 8 * 1024;
14const AUTO_CHILD_SAMPLE_LIMIT: usize = 32;
15const AUTO_MAX_HINT_CAPACITY: usize = 512 * 1024;
16const AUTO_ATTR_ESTIMATE: usize = 24;
17const AUTO_CHILD_ESTIMATE: usize = 96;
18const AUTO_GRANDCHILD_ESTIMATE: usize = 40;
19
20pub fn unmarshal_ref(data: &[u8]) -> Result<NodeRef<'_>> {
21    let mut decoder = Decoder::new(data);
22    let node = decoder.read_node_ref()?;
23
24    if decoder.is_finished() {
25        Ok(node)
26    } else {
27        Err(BinaryError::LeftoverData(decoder.bytes_left()))
28    }
29}
30
31pub fn marshal_to(node: &Node, writer: &mut impl Write) -> Result<()> {
32    let mut encoder = Encoder::new(writer)?;
33    encoder.write_node(node)?;
34    Ok(())
35}
36
37/// Serialize an owned node directly into a `Vec<u8>` using the fast vec writer path.
38pub fn marshal_to_vec(node: &Node, output: &mut Vec<u8>) -> Result<()> {
39    let mut encoder = Encoder::new_vec(output)?;
40    encoder.write_node(node)?;
41    Ok(())
42}
43
44pub fn marshal(node: &Node) -> Result<Vec<u8>> {
45    let mut payload = Vec::with_capacity(DEFAULT_MARSHAL_CAPACITY);
46    marshal_to_vec(node, &mut payload)?;
47    Ok(payload)
48}
49
50/// Serialize a `Node` using a conservative auto strategy.
51///
52/// This keeps the fast one-pass path for typical payloads and only uses
53/// a lightweight preallocation hint for obviously larger payload shapes.
54pub fn marshal_auto(node: &Node) -> Result<Vec<u8>> {
55    if should_auto_reserve_node(node) {
56        marshal_with_capacity(node, estimate_capacity_node(node))
57    } else {
58        marshal(node)
59    }
60}
61
62/// Serialize a `Node` using a two-pass strategy:
63/// 1) compute exact encoded size
64/// 2) write directly into a fixed-size output buffer
65///
66/// This avoids output buffer growth/copies and can be beneficial for large/variable payloads.
67pub fn marshal_exact(node: &Node) -> Result<Vec<u8>> {
68    let plan = build_marshaled_node_plan(node);
69    let mut payload = vec![0; plan.size];
70    let mut encoder = Encoder::new_slice(payload.as_mut_slice(), Some(&plan.hints))?;
71    encoder.write_node(node)?;
72    let written = encoder.bytes_written();
73    debug_assert_eq!(written, payload.len(), "plan size mismatch for Node");
74    payload.truncate(written);
75    Ok(payload)
76}
77
78/// Zero-copy serialization of a `NodeRef` directly into a writer.
79/// This avoids the allocation overhead of converting to an owned `Node` first.
80pub fn marshal_ref_to(node: &NodeRef<'_>, writer: &mut impl Write) -> Result<()> {
81    let mut encoder = Encoder::new(writer)?;
82    encoder.write_node(node)?;
83    Ok(())
84}
85
86/// Serialize a borrowed node directly into a `Vec<u8>` using the fast vec writer path.
87pub fn marshal_ref_to_vec(node: &NodeRef<'_>, output: &mut Vec<u8>) -> Result<()> {
88    let mut encoder = Encoder::new_vec(output)?;
89    encoder.write_node(node)?;
90    Ok(())
91}
92
93/// Zero-copy serialization of a `NodeRef` to a new `Vec<u8>`.
94/// Prefer `marshal_ref_to` with a reusable buffer for best performance.
95pub fn marshal_ref(node: &NodeRef<'_>) -> Result<Vec<u8>> {
96    let mut payload = Vec::with_capacity(DEFAULT_MARSHAL_CAPACITY);
97    marshal_ref_to_vec(node, &mut payload)?;
98    Ok(payload)
99}
100
101/// Serialize a `NodeRef` using the same conservative auto strategy as `marshal_auto`.
102pub fn marshal_ref_auto(node: &NodeRef<'_>) -> Result<Vec<u8>> {
103    if should_auto_reserve_node_ref(node) {
104        marshal_ref_with_capacity(node, estimate_capacity_node_ref(node))
105    } else {
106        marshal_ref(node)
107    }
108}
109
110/// Serialize a `NodeRef` using a two-pass exact-size strategy.
111///
112/// This avoids output buffer growth/copies and preserves zero-copy input semantics.
113pub fn marshal_ref_exact(node: &NodeRef<'_>) -> Result<Vec<u8>> {
114    let plan = build_marshaled_node_ref_plan(node);
115    let mut payload = vec![0; plan.size];
116    let mut encoder = Encoder::new_slice(payload.as_mut_slice(), Some(&plan.hints))?;
117    encoder.write_node(node)?;
118    let written = encoder.bytes_written();
119    debug_assert_eq!(written, payload.len(), "plan size mismatch for NodeRef");
120    payload.truncate(written);
121    Ok(payload)
122}
123
124#[inline]
125fn marshal_with_capacity(node: &Node, capacity: usize) -> Result<Vec<u8>> {
126    let mut payload = Vec::with_capacity(capacity);
127    marshal_to_vec(node, &mut payload)?;
128    Ok(payload)
129}
130
131#[inline]
132fn marshal_ref_with_capacity(node: &NodeRef<'_>, capacity: usize) -> Result<Vec<u8>> {
133    let mut payload = Vec::with_capacity(capacity);
134    marshal_ref_to_vec(node, &mut payload)?;
135    Ok(payload)
136}
137
138#[inline]
139fn should_auto_reserve_node(node: &Node) -> bool {
140    if node.attrs.len() >= AUTO_RESERVE_ATTRS_THRESHOLD {
141        return true;
142    }
143
144    match &node.content {
145        Some(NodeContent::Bytes(bytes)) => bytes.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
146        Some(NodeContent::String(text)) => text.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
147        Some(NodeContent::Nodes(children)) => children.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD,
148        None => false,
149    }
150}
151
152#[inline]
153fn should_auto_reserve_node_ref(node: &NodeRef<'_>) -> bool {
154    if node.attrs.len() >= AUTO_RESERVE_ATTRS_THRESHOLD {
155        return true;
156    }
157
158    match node.content.as_deref() {
159        Some(NodeContentRef::Bytes(bytes)) => bytes.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
160        Some(NodeContentRef::String(text)) => text.len() >= AUTO_RESERVE_SCALAR_THRESHOLD,
161        Some(NodeContentRef::Nodes(children)) => children.len() >= AUTO_RESERVE_CHILDREN_THRESHOLD,
162        None => false,
163    }
164}
165
166#[inline]
167fn estimate_capacity_node(node: &Node) -> usize {
168    let mut estimate = DEFAULT_MARSHAL_CAPACITY + 16;
169    estimate += node.tag.len();
170    estimate += node.attrs.len() * AUTO_ATTR_ESTIMATE;
171
172    match &node.content {
173        Some(NodeContent::Bytes(bytes)) => {
174            estimate += bytes.len() + 8;
175        }
176        Some(NodeContent::String(text)) => {
177            estimate += text.len() + 8;
178        }
179        Some(NodeContent::Nodes(children)) => {
180            estimate += children.len() * AUTO_CHILD_ESTIMATE;
181            for child in children.iter().take(AUTO_CHILD_SAMPLE_LIMIT) {
182                estimate += child.tag.len() + child.attrs.len() * AUTO_ATTR_ESTIMATE;
183                match &child.content {
184                    Some(NodeContent::Bytes(bytes)) => estimate += bytes.len() + 8,
185                    Some(NodeContent::String(text)) => estimate += text.len() + 8,
186                    Some(NodeContent::Nodes(grand_children)) => {
187                        estimate += grand_children.len() * AUTO_GRANDCHILD_ESTIMATE;
188                    }
189                    None => {}
190                }
191                if estimate >= AUTO_MAX_HINT_CAPACITY {
192                    return AUTO_MAX_HINT_CAPACITY;
193                }
194            }
195        }
196        None => {}
197    }
198
199    estimate.clamp(DEFAULT_MARSHAL_CAPACITY, AUTO_MAX_HINT_CAPACITY)
200}
201
202#[inline]
203fn estimate_capacity_node_ref(node: &NodeRef<'_>) -> usize {
204    let mut estimate = DEFAULT_MARSHAL_CAPACITY + 16;
205    estimate += node.tag.len();
206    estimate += node.attrs.len() * AUTO_ATTR_ESTIMATE;
207
208    match node.content.as_deref() {
209        Some(NodeContentRef::Bytes(bytes)) => {
210            estimate += bytes.len() + 8;
211        }
212        Some(NodeContentRef::String(text)) => {
213            estimate += text.len() + 8;
214        }
215        Some(NodeContentRef::Nodes(children)) => {
216            estimate += children.len() * AUTO_CHILD_ESTIMATE;
217            for child in children.iter().take(AUTO_CHILD_SAMPLE_LIMIT) {
218                estimate += child.tag.len() + child.attrs.len() * AUTO_ATTR_ESTIMATE;
219                match child.content.as_deref() {
220                    Some(NodeContentRef::Bytes(bytes)) => estimate += bytes.len() + 8,
221                    Some(NodeContentRef::String(text)) => estimate += text.len() + 8,
222                    Some(NodeContentRef::Nodes(grand_children)) => {
223                        estimate += grand_children.len() * AUTO_GRANDCHILD_ESTIMATE;
224                    }
225                    None => {}
226                }
227                if estimate >= AUTO_MAX_HINT_CAPACITY {
228                    return AUTO_MAX_HINT_CAPACITY;
229                }
230            }
231        }
232        None => {}
233    }
234
235    estimate.clamp(DEFAULT_MARSHAL_CAPACITY, AUTO_MAX_HINT_CAPACITY)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241    use crate::jid::Jid;
242    use crate::node::{Attrs, NodeContent, NodeValue};
243
244    type TestResult = crate::error::Result<()>;
245
246    fn fixture_node() -> Node {
247        let mut attrs = Attrs::with_capacity(4);
248        attrs.push("id".to_string(), "ABC123");
249        attrs.push("to".to_string(), "123456789@s.whatsapp.net");
250        attrs.push(
251            "participant".to_string(),
252            NodeValue::Jid("15551234567@s.whatsapp.net".parse::<Jid>().unwrap()),
253        );
254        attrs.push("hex".to_string(), "DEADBEEF");
255
256        let child = Node::new(
257            "item",
258            Attrs::new(),
259            Some(NodeContent::Bytes(vec![1, 2, 3, 4, 5, 6, 7, 8])),
260        );
261
262        Node::new(
263            "message",
264            attrs,
265            Some(NodeContent::Nodes(vec![
266                child,
267                Node::new(
268                    "text",
269                    Attrs::new(),
270                    Some(NodeContent::String("hello".repeat(40))),
271                ),
272            ])),
273        )
274    }
275
276    fn large_binary_fixture() -> Node {
277        Node::new(
278            "message",
279            Attrs::new(),
280            Some(NodeContent::Bytes(vec![
281                0xAB;
282                AUTO_RESERVE_SCALAR_THRESHOLD + 2048
283            ])),
284        )
285    }
286
287    #[test]
288    fn test_marshaled_node_size_matches_output() -> TestResult {
289        let node = fixture_node();
290        let plan = build_marshaled_node_plan(&node);
291        let payload = marshal(&node)?;
292        assert_eq!(payload.len(), plan.size);
293        Ok(())
294    }
295
296    #[test]
297    fn test_marshaled_node_ref_size_matches_output() -> TestResult {
298        let node = fixture_node();
299        let node_ref = node.as_node_ref();
300        let plan = build_marshaled_node_ref_plan(&node_ref);
301        let payload = marshal_ref(&node_ref)?;
302        assert_eq!(payload.len(), plan.size);
303        Ok(())
304    }
305
306    #[test]
307    fn test_marshal_matches_marshal_to_bytes() -> TestResult {
308        let node = fixture_node();
309
310        let payload_alloc = marshal(&node)?;
311
312        let mut payload_writer = Vec::new();
313        marshal_to(&node, &mut payload_writer)?;
314
315        assert_eq!(payload_alloc, payload_writer);
316        Ok(())
317    }
318
319    #[test]
320    fn test_marshal_ref_matches_marshal_ref_to_bytes() -> TestResult {
321        let node = fixture_node();
322        let node_ref = node.as_node_ref();
323
324        let payload_alloc = marshal_ref(&node_ref)?;
325
326        let mut payload_writer = Vec::new();
327        marshal_ref_to(&node_ref, &mut payload_writer)?;
328
329        assert_eq!(payload_alloc, payload_writer);
330        Ok(())
331    }
332
333    #[test]
334    fn test_marshal_to_vec_matches_marshal_to() -> TestResult {
335        let node = fixture_node();
336
337        let mut payload_vec_writer = Vec::new();
338        marshal_to_vec(&node, &mut payload_vec_writer)?;
339
340        let mut payload_writer = Vec::new();
341        marshal_to(&node, &mut payload_writer)?;
342
343        assert_eq!(payload_vec_writer, payload_writer);
344        Ok(())
345    }
346
347    #[test]
348    fn test_marshal_ref_to_vec_matches_marshal_ref_to() -> TestResult {
349        let node = fixture_node();
350        let node_ref = node.as_node_ref();
351
352        let mut payload_vec_writer = Vec::new();
353        marshal_ref_to_vec(&node_ref, &mut payload_vec_writer)?;
354
355        let mut payload_writer = Vec::new();
356        marshal_ref_to(&node_ref, &mut payload_writer)?;
357
358        assert_eq!(payload_vec_writer, payload_writer);
359        Ok(())
360    }
361
362    #[test]
363    fn test_marshal_exact_matches_marshal_to_bytes() -> TestResult {
364        let node = fixture_node();
365
366        let payload_exact = marshal_exact(&node)?;
367
368        let mut payload_writer = Vec::new();
369        marshal_to(&node, &mut payload_writer)?;
370
371        assert_eq!(payload_exact, payload_writer);
372        Ok(())
373    }
374
375    #[test]
376    fn test_marshal_ref_exact_matches_marshal_ref_to_bytes() -> TestResult {
377        let node = fixture_node();
378        let node_ref = node.as_node_ref();
379
380        let payload_exact = marshal_ref_exact(&node_ref)?;
381
382        let mut payload_writer = Vec::new();
383        marshal_ref_to(&node_ref, &mut payload_writer)?;
384
385        assert_eq!(payload_exact, payload_writer);
386        Ok(())
387    }
388
389    #[test]
390    fn test_marshal_auto_matches_marshal_to_bytes() -> TestResult {
391        let node = fixture_node();
392        let payload_auto = marshal_auto(&node)?;
393
394        let mut payload_writer = Vec::new();
395        marshal_to(&node, &mut payload_writer)?;
396
397        assert_eq!(payload_auto, payload_writer);
398        Ok(())
399    }
400
401    #[test]
402    fn test_marshal_ref_auto_matches_marshal_ref_to_bytes() -> TestResult {
403        let node = fixture_node();
404        let node_ref = node.as_node_ref();
405        let payload_auto = marshal_ref_auto(&node_ref)?;
406
407        let mut payload_writer = Vec::new();
408        marshal_ref_to(&node_ref, &mut payload_writer)?;
409
410        assert_eq!(payload_auto, payload_writer);
411        Ok(())
412    }
413
414    #[test]
415    fn test_marshal_auto_large_binary_matches_marshal_to_bytes() -> TestResult {
416        let node = large_binary_fixture();
417        let payload_auto = marshal_auto(&node)?;
418
419        let mut payload_writer = Vec::new();
420        marshal_to(&node, &mut payload_writer)?;
421
422        assert_eq!(payload_auto, payload_writer);
423        Ok(())
424    }
425
426    #[test]
427    fn test_marshal_ref_auto_large_binary_matches_marshal_ref_to_bytes() -> TestResult {
428        let node = large_binary_fixture();
429        let node_ref = node.as_node_ref();
430        let payload_auto = marshal_ref_auto(&node_ref)?;
431
432        let mut payload_writer = Vec::new();
433        marshal_ref_to(&node_ref, &mut payload_writer)?;
434
435        assert_eq!(payload_auto, payload_writer);
436        Ok(())
437    }
438}