ssb_legacy_msg_data/
value.rs

1//! Data structures for storing and manipulating arbitrary legacy data.
2
3use std::borrow::Borrow;
4use std::cmp::Ordering;
5use std::collections::{btree_map, BTreeMap};
6use std::fmt;
7
8use indexmap::{map, IndexMap};
9use serde::{
10    de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor},
11    ser::{Serialize, SerializeMap, SerializeSeq, Serializer},
12};
13
14use super::{legacy_length, LegacyF64};
15
16// The maximum capacity of entries to preallocate for arrays and objects. Even if malicious input
17// claims to contain a much larger collection, only this much memory will be blindly allocated.
18static MAX_ALLOC: usize = 2048;
19
20/// Represents any valid ssb legacy message value, preserving the order of object entries.
21#[derive(PartialEq, Eq, Debug, Clone)]
22pub enum Value {
23    /// The [null](https://spec.scuttlebutt.nz/feed/datamodel.html#null) value.
24    Null,
25    /// A [boolean](https://spec.scuttlebutt.nz/feed/datamodel.html#booleans).
26    Bool(bool),
27    /// A [float](https://spec.scuttlebutt.nz/feed/datamodel.html#floats).
28    Float(LegacyF64),
29    /// A [string](https://spec.scuttlebutt.nz/feed/datamodel.html#strings).
30    String(String),
31    /// An [array](https://spec.scuttlebutt.nz/feed/datamodel.html#arrays).
32    Array(Vec<Value>),
33    /// An [object](https://spec.scuttlebutt.nz/feed/datamodel.html#objects).
34    Object(RidiculousStringMap<Value>),
35}
36
37impl Serialize for Value {
38    #[inline]
39    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
40    where
41        S: Serializer,
42    {
43        match *self {
44            Value::Null => serializer.serialize_unit(),
45            Value::Bool(b) => serializer.serialize_bool(b),
46            Value::Float(f) => serializer.serialize_f64(f.into()),
47            Value::String(ref s) => serializer.serialize_str(s),
48            Value::Array(ref v) => {
49                let mut s = serializer.serialize_seq(Some(v.len()))?;
50                for inner in v {
51                    s.serialize_element(inner)?;
52                }
53                s.end()
54            }
55            Value::Object(ref m) => {
56                let mut s = serializer.serialize_map(Some(m.len()))?;
57                for (key, value) in m {
58                    s.serialize_entry(&key, value)?;
59                }
60                s.end()
61            }
62        }
63    }
64}
65
66impl<'de> Deserialize<'de> for Value {
67    fn deserialize<D>(deserializer: D) -> Result<Value, D::Error>
68    where
69        D: Deserializer<'de>,
70    {
71        deserializer.deserialize_any(ValueVisitor)
72    }
73}
74
75struct ValueVisitor;
76
77impl<'de> Visitor<'de> for ValueVisitor {
78    type Value = Value;
79
80    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
81        formatter.write_str("any valid legacy ssb value")
82    }
83
84    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> {
85        Ok(Value::Bool(v))
86    }
87
88    fn visit_f64<E: Error>(self, v: f64) -> Result<Self::Value, E> {
89        match LegacyF64::from_f64(v) {
90            Some(f) => Ok(Value::Float(f)),
91            None => Err(E::custom("invalid float")),
92        }
93    }
94
95    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
96        self.visit_string(v.to_string())
97    }
98
99    fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
100        Ok(Value::String(v))
101    }
102
103    fn visit_unit<E>(self) -> Result<Self::Value, E> {
104        Ok(Value::Null)
105    }
106
107    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
108    where
109        A: SeqAccess<'de>,
110    {
111        // use the size hint, but put a maximum to the allocation because we can't trust the input
112        let mut v = Vec::with_capacity(std::cmp::min(seq.size_hint().unwrap_or(0), MAX_ALLOC));
113
114        while let Some(inner) = seq.next_element()? {
115            v.push(inner);
116        }
117
118        Ok(Value::Array(v))
119    }
120
121    fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
122    where
123        A: MapAccess<'de>,
124    {
125        // use the size hint, but put a maximum to the allocation because we can't trust the input
126        let mut m = RidiculousStringMap::with_capacity(std::cmp::min(
127            map.size_hint().unwrap_or(0),
128            MAX_ALLOC,
129        ));
130
131        while let Some((key, val)) = map.next_entry()? {
132            if m.insert(key, val).is_some() {
133                return Err(A::Error::custom("map had duplicate key"));
134            }
135        }
136
137        Ok(Value::Object(m))
138    }
139}
140
141/// Represents any valid ssb legacy message value that can be used as the content of a message,
142/// preserving the order of object entries.
143///
144/// On deserialization, this enforces that the value is an object with a correct `type` entry.
145#[derive(PartialEq, Eq, Debug, Clone)]
146pub struct ContentValue(pub Value);
147
148impl Serialize for ContentValue {
149    #[inline]
150    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
151    where
152        S: Serializer,
153    {
154        self.0.serialize(serializer)
155    }
156}
157
158impl<'de> Deserialize<'de> for ContentValue {
159    fn deserialize<D>(deserializer: D) -> Result<ContentValue, D::Error>
160    where
161        D: Deserializer<'de>,
162    {
163        // allowable types include String and Map so we let the input determine the applicable
164        // deserialization method to use
165        deserializer.deserialize_any(ContentValueVisitor::new())
166    }
167}
168
169struct ContentValueVisitor(bool);
170
171impl ContentValueVisitor {
172    fn new() -> ContentValueVisitor {
173        ContentValueVisitor(true)
174    }
175}
176
177impl<'de> Visitor<'de> for ContentValueVisitor {
178    type Value = ContentValue;
179
180    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
181        formatter.write_str("a string or map of valid legacy ssb values")
182    }
183
184    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
185        self.visit_string(v.to_string())
186    }
187
188    fn visit_string<E: Error>(self, v: String) -> Result<Self::Value, E> {
189        Ok(ContentValue(Value::String(v)))
190    }
191
192    fn visit_map<A>(mut self, mut map: A) -> Result<Self::Value, A::Error>
193    where
194        A: MapAccess<'de>,
195    {
196        // use the size hint, but put a maximum to the allocation because we can't trust the input
197        let mut m = RidiculousStringMap::with_capacity(std::cmp::min(
198            map.size_hint().unwrap_or(0),
199            MAX_ALLOC,
200        ));
201
202        while let Some((key, val)) = map.next_entry::<String, Value>()? {
203            if self.0 && key == "type" {
204                match val {
205                    Value::String(ref type_str) => {
206                        if check_type_value(type_str) {
207                            self.0 = false;
208                        } else {
209                            return Err(A::Error::custom("content had invalid type"));
210                        }
211                    }
212                    _ => return Err(A::Error::custom("content type must be a string")),
213                }
214            }
215
216            if m.insert(key, val).is_some() {
217                return Err(A::Error::custom("map had duplicate key"));
218            }
219        }
220
221        if self.0 {
222            return Err(A::Error::custom("content had no `type` entry"));
223        }
224
225        Ok(ContentValue(Value::Object(m)))
226    }
227}
228
229/// Check whether the given string is a valid `type` value of a content object.
230fn check_type_value(s: &str) -> bool {
231    let len = legacy_length(s);
232
233    (3..=52).contains(&len)
234}
235
236/// A map with string keys that sorts strings according to
237/// [object entry order](https://spec.scuttlebutt.nz/feed/datamodel.html#signing-encoding-objects),
238/// using insertion order for non-int keys.
239#[derive(Debug, PartialEq, Eq, Clone, Default)]
240pub struct RidiculousStringMap<V> {
241    // Keys that parse as natural numbers strictly less than 2^32 - 1, sorted numerically.
242    naturals: BTreeMap<GraphicolexicalString, V>,
243    // The remaining keys, sorted in insertion order.
244    others: IndexMap<String, V>,
245}
246
247impl<V> RidiculousStringMap<V> {
248    /// Create a new map with capacity for `n` key-value pairs. (Does not
249    /// allocate if `n` is zero.)
250    ///
251    /// This only preallocates capacity for non-numeric strings.
252    pub fn with_capacity(capacity: usize) -> RidiculousStringMap<V> {
253        RidiculousStringMap {
254            naturals: BTreeMap::new(),
255            others: IndexMap::with_capacity(capacity),
256        }
257    }
258
259    /// Check if the map is empty.
260    pub fn is_empty(&self) -> bool {
261        self.naturals.is_empty() && self.others.is_empty()
262    }
263
264    /// Returns the number of elements in the map.
265    pub fn len(&self) -> usize {
266        self.naturals.len() + self.others.len()
267    }
268
269    /// Inserts a key-value pair into the map.
270    ///
271    /// If the map did not have this key present, `None` is returned.
272    ///
273    /// If the map did have this key present, the value is updated, and the old
274    /// value is returned. The key is not updated, though; this matters for
275    /// types that can be `==` without being identical.
276    pub fn insert(&mut self, key: String, val: V) -> Option<V> {
277        if is_int_str(&key) {
278            self.naturals.insert(GraphicolexicalString(key), val)
279        } else {
280            self.others.insert(key, val)
281        }
282    }
283
284    /// Deletes a key-value pair from the map.
285    ///
286    pub fn remove(&mut self, key: String) -> Option<V> {
287        if is_int_str(&key) {
288            self.naturals.remove(&GraphicolexicalString(key))
289        } else {
290            self.others.remove(&key)
291        }
292    }
293
294    /// Gets an iterator over the entries of the map. It first yields all entries with
295    /// [numeric](https://spec.scuttlebutt.nz/feed/datamodel.html#signing-encoding-objects) keys
296    /// in ascending order, and then the remaining entries in the same order in
297    /// which they were inserted.
298    pub fn iter(&self) -> Iter<V> {
299        Iter {
300            naturals: self.naturals.iter(),
301            others: self.others.iter(),
302            nats: true,
303        }
304    }
305
306    /// Returns a reference to the value corresponding to the key.
307    pub fn get(&self, key: &str) -> Option<&V> {
308        if is_int_str(key) {
309            self.naturals.get(key)
310        } else {
311            self.others.get(key)
312        }
313    }
314    /// Returns a reference to the value corresponding to the key.
315    pub fn get_mut(&mut self, key: &str) -> Option<&mut V> {
316        if is_int_str(key) {
317            self.naturals.get_mut(key)
318        } else {
319            self.others.get_mut(key)
320        }
321    }
322}
323
324fn is_int_str(s: &str) -> bool {
325    if s == "0" {
326        return true;
327    }
328
329    match s.as_bytes().split_first() {
330        Some((0x31..=0x39, tail)) => {
331            if tail.iter().all(|byte| *byte >= 0x30 && *byte <= 0x39) {
332                if tail.len() >= 10 {
333                    return false;
334                }
335
336                s.parse::<u64>().unwrap() < (std::u32::MAX as u64)
337            } else {
338                false
339            }
340        }
341        _ => false,
342    }
343}
344
345#[test]
346fn test_is_int_str() {
347    assert!(is_int_str("0"));
348    assert!(!is_int_str("01"));
349    assert!(!is_int_str("00"));
350    assert!(is_int_str("5"));
351    assert!(is_int_str("4294967294"));
352    assert!(!is_int_str("4294967295"));
353    assert!(!is_int_str("4294967296"));
354    assert!(!is_int_str("4294967297"));
355    assert!(!is_int_str("42949672930"));
356    assert!(!is_int_str("42949672940"));
357    assert!(is_int_str("429496729"));
358    assert!(!is_int_str("52949672940"));
359}
360
361impl<'a, V> IntoIterator for &'a RidiculousStringMap<V> {
362    type Item = (&'a String, &'a V);
363    type IntoIter = Iter<'a, V>;
364
365    fn into_iter(self) -> Iter<'a, V> {
366        self.iter()
367    }
368}
369
370/// An iterator over the entries of a [`RidiculousStringMap`](RidiculousStringMap), first
371/// yielding all entries with
372/// [numeric](https://spec.scuttlebutt.nz/feed/datamodel.html#signing-encoding-objects) keys
373/// in ascending order, and then yielding the remaining entries in the same order in
374/// which they were inserted into the map.
375pub struct Iter<'a, V> {
376    naturals: btree_map::Iter<'a, GraphicolexicalString, V>,
377    others: map::Iter<'a, String, V>,
378    nats: bool,
379}
380
381impl<'a, V> Iterator for Iter<'a, V> {
382    type Item = (&'a String, &'a V);
383
384    fn next(&mut self) -> Option<(&'a String, &'a V)> {
385        if self.nats {
386            match self.naturals.next() {
387                None => {
388                    self.nats = false;
389                    self.next()
390                }
391                Some((key, val)) => Some((&key.0, val)),
392            }
393        } else {
394            self.others.next()
395        }
396    }
397}
398
399// A wrapper around String, that compares by length first and uses lexicographical order as a
400// tie-breaker.
401#[derive(PartialEq, Eq, Clone, Hash)]
402struct GraphicolexicalString(String);
403
404impl PartialOrd for GraphicolexicalString {
405    fn partial_cmp(&self, other: &GraphicolexicalString) -> Option<Ordering> {
406        Some(self.cmp(other))
407    }
408}
409
410impl Ord for GraphicolexicalString {
411    fn cmp(&self, other: &GraphicolexicalString) -> Ordering {
412        match self.0.len().cmp(&other.0.len()) {
413            Ordering::Greater => Ordering::Greater,
414            Ordering::Less => Ordering::Less,
415            Ordering::Equal => self.0.cmp(&other.0),
416        }
417    }
418}
419
420impl fmt::Debug for GraphicolexicalString {
421    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
422        self.0.fmt(f)
423    }
424}
425
426impl Borrow<str> for GraphicolexicalString {
427    fn borrow(&self) -> &str {
428        self.0.borrow()
429    }
430}
431
432impl From<String> for GraphicolexicalString {
433    fn from(s: String) -> Self {
434        GraphicolexicalString(s)
435    }
436}
437
438impl From<GraphicolexicalString> for String {
439    fn from(s: GraphicolexicalString) -> Self {
440        s.0
441    }
442}