nym_sphinx_chunking/
lib.rs

1// Copyright 2021 - Nym Technologies SA <contact@nymtech.net>
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::fragment::{linked_fragment_payload_max_len, unlinked_fragment_payload_max_len};
5use fragment::FragmentHeader;
6use nym_crypto::asymmetric::ed25519::PublicKey;
7use serde::Serialize;
8pub use set::split_into_sets;
9use thiserror::Error;
10use utoipa::ToSchema;
11
12pub const MIN_PADDING_OVERHEAD: usize = 1;
13
14// Future consideration: currently in a lot of places, the payloads have randomised content
15// which is not a perfect testing strategy as it might not detect some edge cases I never would
16// have assumed could be possible. A better approach would be to research some Fuzz testing
17// library like: https://github.com/rust-fuzz/afl.rs and use that instead for the inputs.
18
19// perhaps it might be useful down the line for interaction testing between client,mixes,etc?
20
21// TODO: this module has evolved significantly since the tests were first written
22// they should definitely be revisited.
23// For instance there are not tests for the cases when we are padding the message
24
25pub mod fragment;
26pub mod reconstruction;
27pub mod set;
28
29pub mod monitoring {
30    use crate::fragment::Fragment;
31    use crate::{ReceivedFragment, SentFragment};
32    use dashmap::DashMap;
33    use nym_crypto::asymmetric::ed25519::PublicKey;
34    use std::sync::LazyLock;
35    use std::sync::atomic::{AtomicBool, Ordering};
36
37    pub static ENABLED: AtomicBool = AtomicBool::new(false);
38
39    pub static FRAGMENTS_RECEIVED: LazyLock<DashMap<i32, Vec<ReceivedFragment>>> =
40        LazyLock::new(DashMap::new);
41
42    pub static FRAGMENTS_SENT: LazyLock<DashMap<i32, Vec<SentFragment>>> =
43        LazyLock::new(DashMap::new);
44
45    pub fn enable() {
46        ENABLED.store(true, Ordering::Relaxed)
47    }
48
49    pub fn enabled() -> bool {
50        ENABLED.load(Ordering::Relaxed)
51    }
52
53    #[macro_export]
54    macro_rules! now {
55        () => {
56            match std::time::SystemTime::now().duration_since(std::time::SystemTime::UNIX_EPOCH) {
57                Ok(n) => n.as_secs(),
58                Err(_) => 0,
59            }
60        };
61    }
62
63    pub fn fragment_received(fragment: &Fragment) {
64        if enabled() {
65            let id = fragment.fragment_identifier().set_id();
66            let mut entry = FRAGMENTS_RECEIVED.entry(id).or_default();
67            let r = ReceivedFragment::new(fragment.header(), now!());
68            entry.push(r);
69        }
70    }
71
72    pub fn fragment_sent(fragment: &Fragment, client_nonce: i32, destination: PublicKey) {
73        if enabled() {
74            let id = fragment.fragment_identifier().set_id();
75            let mut entry = FRAGMENTS_SENT.entry(id).or_default();
76            let s = SentFragment::new(fragment.header(), now!(), client_nonce, destination);
77            entry.push(s);
78        }
79    }
80}
81
82#[derive(Debug, Clone)]
83pub struct FragmentMixParams {
84    destination: PublicKey,
85}
86
87impl FragmentMixParams {
88    pub fn destination(&self) -> PublicKey {
89        self.destination
90    }
91}
92
93#[derive(Debug, Clone, Serialize, ToSchema)]
94pub struct SentFragment {
95    header: FragmentHeader,
96    at: u64,
97    client_nonce: i32,
98    #[serde(skip)]
99    mixnet_params: FragmentMixParams,
100}
101
102impl SentFragment {
103    fn new(header: FragmentHeader, at: u64, client_nonce: i32, destination: PublicKey) -> Self {
104        let mixnet_params = FragmentMixParams { destination };
105        SentFragment {
106            header,
107            at,
108            client_nonce,
109            mixnet_params,
110        }
111    }
112
113    pub fn header(&self) -> FragmentHeader {
114        self.header.clone()
115    }
116
117    pub fn at(&self) -> u64 {
118        self.at
119    }
120
121    pub fn client_nonce(&self) -> i32 {
122        self.client_nonce
123    }
124
125    pub fn seed(&self) -> i32 {
126        self.header().seed().wrapping_mul(self.client_nonce())
127    }
128
129    pub fn mixnet_params(&self) -> FragmentMixParams {
130        self.mixnet_params.clone()
131    }
132}
133
134#[derive(Debug, Clone, Serialize, ToSchema)]
135pub struct ReceivedFragment {
136    header: FragmentHeader,
137    at: u64,
138}
139
140impl ReceivedFragment {
141    fn new(header: FragmentHeader, at: u64) -> Self {
142        ReceivedFragment { header, at }
143    }
144
145    pub fn header(&self) -> FragmentHeader {
146        self.header.clone()
147    }
148
149    pub fn at(&self) -> u64 {
150        self.at
151    }
152}
153
154/// The idea behind the process of chunking is to incur as little data overhead as possible due
155/// to very computationally costly sphinx encapsulation procedure.
156///
157/// To achieve this, the underlying message is split into so-called "sets", which are further
158/// subdivided into the base unit of "fragment" that is directly encapsulated by a Sphinx packet.
159/// This allows to encapsulate messages of arbitrary length.
160///
161/// Each message, regardless of its size, consists of at least a single `Set` that has at least
162/// a single `Fragment`.
163///
164/// Each `Fragment` can have variable, yet fully deterministic, length,
165/// that depends on its position in the set as well as total number of sets. This is further
166/// explained in `fragment.rs` file.  
167///
168/// Similarly, each `Set` can have a variable number of `Fragment`s inside. However, that
169/// value is more restrictive: if it's the last set into which the message was split
170/// (or implicitly the only one), it has no lower bound on the number of `Fragment`s.
171/// (Apart from the restriction of containing at least a single one). If the set is located
172/// somewhere in the middle, *it must be* full. Finally, regardless of its position, it must also be
173/// true that it contains no more than `u8::MAX`, i.e. 255 `Fragment`s.
174/// Again, the reasoning for this is further explained in `set.rs` file. However, you might
175/// also want to look at `fragment.rs` to understand the full context behind that design choice.
176///
177/// Both of those concepts as well as their structures, i.e. `Set` and `Fragment`
178/// are further explained in the respective files.
179
180#[derive(PartialEq, Eq, Debug, Error)]
181pub enum ChunkingError {
182    #[error("Received payload is too long. Got {received}, expected {expected}")]
183    InvalidPayloadLengthError { received: usize, expected: usize },
184
185    #[error("Received payload is too long. Got {received}, expected at most {expected_at_most}")]
186    TooLongPayloadLengthError {
187        received: usize,
188        expected_at_most: usize,
189    },
190
191    // this should really be split into multiple variants to provide better error information
192    #[error("Provided header was malformed or contained self-contradicting fields")]
193    MalformedHeaderError,
194
195    #[error(
196        "Received too few bytes to deserialize fragment header. Got {received}, expected {expected}"
197    )]
198    TooShortFragmentHeader { received: usize, expected: usize },
199
200    #[error("Received fragment identifier ({received}) is not a valid value!")]
201    MalformedFragmentIdentifier { received: i32 },
202}
203
204/// Returns number of fragments the message will be split to as well as number of available
205/// bytes in the final fragment
206pub fn number_of_required_fragments(
207    message_len: usize,
208    plaintext_per_fragment: usize,
209) -> (usize, usize) {
210    let max_unlinked = unlinked_fragment_payload_max_len(plaintext_per_fragment);
211    let max_linked = linked_fragment_payload_max_len(plaintext_per_fragment);
212
213    match set::total_number_of_sets(message_len, plaintext_per_fragment) {
214        1 => {
215            // is if it's a single fragment message
216            if message_len < max_unlinked {
217                return (1, max_unlinked - message_len);
218            }
219
220            // all fragments will be 'unlinked'
221            let quot = message_len / max_unlinked;
222            let rem = message_len % max_unlinked;
223
224            if rem == 0 {
225                (quot, 0)
226            } else {
227                (quot + 1, max_unlinked - rem)
228            }
229        }
230
231        n => {
232            // in first and last set there will be one 'linked' fragment
233            // and two 'linked' fragment in every other set, meaning
234            // there will be 2 * (n - 2) + 2 = 2n - 2 'linked' fragments total
235            // rest will be 'unlinked'
236
237            // we know for sure that all fragments in all but last set are definitely full
238            // (last one has single 'linked' fragment)
239            let without_last = (n - 1) * (u8::MAX as usize);
240            let linked_fragments_without_last = (2 * n - 2) - 1;
241            let unlinked_fragments_without_last = without_last - linked_fragments_without_last;
242
243            let final_set_message_len = message_len
244                - linked_fragments_without_last * max_linked
245                - unlinked_fragments_without_last * max_unlinked;
246
247            // we must be careful with the last set as it might be the case that it only
248            // consists of a single, linked, non-full fragment
249            match final_set_message_len {
250                n if n < max_linked => (without_last + 1, max_linked - final_set_message_len),
251                n if n == max_linked => (without_last + 1, 0),
252                _ => {
253                    let remaining_len = final_set_message_len - max_linked;
254
255                    let quot = remaining_len / max_unlinked;
256                    let rem = remaining_len % max_unlinked;
257
258                    if rem == 0 {
259                        (without_last + quot + 1, 0)
260                    } else {
261                        (without_last + quot + 2, max_unlinked - rem)
262                    }
263                }
264            }
265        }
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::set::{max_one_way_linked_set_payload_length, two_way_linked_set_payload_length};
273    use nym_sphinx_addressing::nodes::MAX_NODE_ADDRESS_UNPADDED_LEN;
274    use nym_sphinx_params::packet_sizes::PacketSize;
275
276    #[test]
277    fn calculating_number_of_required_fragments() {
278        // plaintext len should not affect this at all, but let's test it with something tiny
279        // and reasonable
280        let used_plaintext_len = PacketSize::default().plaintext_size()
281            - PacketSize::AckPacket.size()
282            - MAX_NODE_ADDRESS_UNPADDED_LEN;
283
284        let plaintext_lens = vec![17, used_plaintext_len, 20, 42, 10000];
285        const SET_LEN: usize = u8::MAX as usize;
286
287        for plaintext_len in plaintext_lens {
288            let unlinked_len = unlinked_fragment_payload_max_len(plaintext_len);
289            let linked_len = linked_fragment_payload_max_len(plaintext_len);
290            let full_edge_set = max_one_way_linked_set_payload_length(plaintext_len);
291            let full_middle_set = two_way_linked_set_payload_length(plaintext_len);
292
293            let single_non_full_frag_message_len = unlinked_len - 5;
294            let (frags, space_left) =
295                number_of_required_fragments(single_non_full_frag_message_len, plaintext_len);
296            assert_eq!(frags, 1);
297            assert_eq!(space_left, unlinked_len - single_non_full_frag_message_len);
298
299            let single_full_frag_message_len = unlinked_len;
300            let (frags, space_left) =
301                number_of_required_fragments(single_full_frag_message_len, plaintext_len);
302            assert_eq!(frags, 1);
303            assert_eq!(space_left, 0);
304
305            let two_non_full_frags_len = unlinked_len + 1;
306            let (frags, space_left) =
307                number_of_required_fragments(two_non_full_frags_len, plaintext_len);
308            assert_eq!(frags, 2);
309            assert_eq!(space_left, unlinked_len - 1);
310
311            let two_full_frags_len = 2 * unlinked_len;
312            let (frags, space_left) =
313                number_of_required_fragments(two_full_frags_len, plaintext_len);
314            assert_eq!(frags, 2);
315            assert_eq!(space_left, 0);
316
317            let multi_single_set_frags_non_full = unlinked_len * 42 - 5;
318            let (frags, space_left) =
319                number_of_required_fragments(multi_single_set_frags_non_full, plaintext_len);
320            assert_eq!(frags, 42);
321            assert_eq!(space_left, 5);
322
323            let multi_single_set_frags_full = unlinked_len * 42;
324            let (frags, space_left) =
325                number_of_required_fragments(multi_single_set_frags_full, plaintext_len);
326            assert_eq!(frags, 42);
327            assert_eq!(space_left, 0);
328
329            let two_set_one_non_full_frag = full_edge_set + linked_len - 1;
330            let (frags, space_left) =
331                number_of_required_fragments(two_set_one_non_full_frag, plaintext_len);
332            assert_eq!(frags, SET_LEN + 1);
333            assert_eq!(space_left, 1);
334
335            let two_set_one_full_frag = full_edge_set + linked_len;
336            let (frags, space_left) =
337                number_of_required_fragments(two_set_one_full_frag, plaintext_len);
338            assert_eq!(frags, SET_LEN + 1);
339            assert_eq!(space_left, 0);
340
341            let two_set_multi_frags_non_full = full_edge_set + linked_len + unlinked_len * 41 - 5;
342            let (frags, space_left) =
343                number_of_required_fragments(two_set_multi_frags_non_full, plaintext_len);
344            assert_eq!(frags, SET_LEN + 42);
345            assert_eq!(space_left, 5);
346
347            let two_set_multi_frags_full = full_edge_set + linked_len + unlinked_len * 41;
348            let (frags, space_left) =
349                number_of_required_fragments(two_set_multi_frags_full, plaintext_len);
350            assert_eq!(frags, SET_LEN + 42);
351            assert_eq!(space_left, 0);
352
353            let ten_set_one_non_full_frag = full_edge_set + 8 * full_middle_set + linked_len - 1;
354            let (frags, space_left) =
355                number_of_required_fragments(ten_set_one_non_full_frag, plaintext_len);
356            assert_eq!(frags, 9 * SET_LEN + 1);
357            assert_eq!(space_left, 1);
358
359            let ten_set_one_full_frag = full_edge_set + 8 * full_middle_set + linked_len;
360            let (frags, space_left) =
361                number_of_required_fragments(ten_set_one_full_frag, plaintext_len);
362            assert_eq!(frags, 9 * SET_LEN + 1);
363            assert_eq!(space_left, 0);
364
365            let ten_set_multi_frags_non_full =
366                full_edge_set + 8 * full_middle_set + linked_len + 41 * unlinked_len - 5;
367            let (frags, space_left) =
368                number_of_required_fragments(ten_set_multi_frags_non_full, plaintext_len);
369            assert_eq!(frags, 9 * SET_LEN + 42);
370            assert_eq!(space_left, 5);
371
372            let ten_set_multi_frags_full =
373                full_edge_set + 8 * full_middle_set + linked_len + 41 * unlinked_len;
374            let (frags, space_left) =
375                number_of_required_fragments(ten_set_multi_frags_full, plaintext_len);
376            assert_eq!(frags, 9 * SET_LEN + 42);
377            assert_eq!(space_left, 0);
378        }
379    }
380}