ssb_json_msg_data/
value.rs

1//! Data structures for storing and manipulating arbitrary legacy data.
2
3use std::borrow::Borrow;
4use std::cmp::Ordering;
5use std::collections::{btree_map, BTreeMap};
6use std::fmt;
7
8use indexmap::{map, IndexMap};
9use serde::{
10    de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor},
11    ser::{Serialize, SerializeMap, SerializeSeq, Serializer},
12};
13
14use super::{legacy_length, LegacyF64};
15
16// The maximum capacity of entries to preallocate for arrays and objects. Even if malicious input
17// claims to contain a much larger collection, only this much memory will be blindly allocated.
18static MAX_ALLOC: usize = 2048;
19
20/// Represents any valid ssb legacy message value, preserving the order of object entries.
21#[derive(PartialEq, Eq, Debug, Clone)]
22pub enum Value {
23    /// The [null](https://spec.scuttlebutt.nz/datamodel.html#null) value.
24    Null,
25    /// A [boolean](https://spec.scuttlebutt.nz/datamodel.html#booleans).
26    Bool(bool),
27    /// A [float](https://spec.scuttlebutt.nz/datamodel.html#floats).
28    Float(LegacyF64),
29    /// A [string](https://spec.scuttlebutt.nz/datamodel.html#strings).
30    String(String),
31    /// An [array](https://spec.scuttlebutt.nz/datamodel.html#arrays).
32    Array(Vec<Value>),
33    /// An [object](https://spec.scuttlebutt.nz/datamodel.html#objects).
34    Object(RidiculousStringMap<Value>),
35}
36
37impl Serialize for Value {
38    #[inline]
39    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
40    where
41        S: Serializer,
42    {
43        match *self {
44            Value::Null => serializer.serialize_unit(),
45            Value::Bool(b) => serializer.serialize_bool(b),
46            Value::Float(f) => serializer.serialize_f64(f.into()),
47            Value::String(ref s) => serializer.serialize_str(s),
48            Value::Array(ref v) => {
49                let mut s = serializer.serialize_seq(Some(v.len()))?;
50                for inner in v {
51                    s.serialize_element(inner)?;
52                }
53                s.end()
54            }
55            Value::Object(ref m) => {
56                let mut s = serializer.serialize_map(Some(m.len()))?;
57                for (key, value) in m {
58                    s.serialize_entry(&key, value)?;
59                }
60                s.end()
61            }
62        }
63    }
64}
65
66impl<'de> Deserialize<'de> for Value {
67    fn deserialize<D>(deserializer: D) -> Result<Value, D::Error>
68    where
69        D: Deserializer<'de>,
70    {
71        deserializer.deserialize_any(ValueVisitor)
72    }
73}
74
75struct ValueVisitor;
76
77impl<'de> Visitor<'de> for ValueVisitor {
78    type Value = Value;
79
80    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
81        formatter.write_str("any valid legacy ssb value")
82    }
83
84    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> {
85        Ok(Value::Bool(v))
86    }
87
88    fn visit_f64<E: Error>(self, v: f64) -> Result<Self::Value, E> {
89        match LegacyF64::from_f64(v) {
90            Some(f) => Ok(Value::Float(f)),
91            None => Err(E::custom("invalid float")),
92        }
93    }
94
95    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
96        self.visit_string(v.to_string())
97    }
98
99    fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
100        Ok(Value::String(v))
101    }
102
103    fn visit_unit<E>(self) -> Result<Self::Value, E> {
104        Ok(Value::Null)
105    }
106
107    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
108    where
109        A: SeqAccess<'de>,
110    {
111        // use the size hint, but put a maximum to the allocation because we can't trust the input
112        let mut v = Vec::with_capacity(std::cmp::min(seq.size_hint().unwrap_or(0), MAX_ALLOC));
113
114        while let Some(inner) = seq.next_element()? {
115            v.push(inner);
116        }
117
118        Ok(Value::Array(v))
119    }
120
121    fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
122    where
123        A: MapAccess<'de>,
124    {
125        // use the size hint, but put a maximum to the allocation because we can't trust the input
126        let mut m = RidiculousStringMap::with_capacity(std::cmp::min(
127            map.size_hint().unwrap_or(0),
128            MAX_ALLOC,
129        ));
130
131        while let Some((key, val)) = map.next_entry()? {
132            if let Some(_) = m.insert(key, val) {
133                return Err(A::Error::custom("map had duplicate key"));
134            }
135        }
136
137        Ok(Value::Object(m))
138    }
139}
140
141/// Represents any valid ssb legacy message value that can be used as the content of a message,
142/// preserving the order of object entries.
143///
144/// On deserialization, this enforces that the value is an object with a correct `type` entry.
145#[derive(PartialEq, Eq, Debug, Clone)]
146pub struct ContentValue(pub Value);
147
148impl Serialize for ContentValue {
149    #[inline]
150    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
151    where
152        S: Serializer,
153    {
154        self.0.serialize(serializer)
155    }
156}
157
158impl<'de> Deserialize<'de> for ContentValue {
159    fn deserialize<D>(deserializer: D) -> Result<ContentValue, D::Error>
160    where
161        D: Deserializer<'de>,
162    {
163        deserializer.deserialize_map(ContentValueVisitor::new())
164    }
165}
166
167struct ContentValueVisitor(bool);
168
169impl ContentValueVisitor {
170    fn new() -> ContentValueVisitor {
171        ContentValueVisitor(true)
172    }
173}
174
175impl<'de> Visitor<'de> for ContentValueVisitor {
176    type Value = ContentValue;
177
178    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
179        formatter.write_str("any valid legacy ssb content value")
180    }
181
182    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> {
183        Ok(ContentValue(Value::Bool(v)))
184    }
185
186    fn visit_f64<E: Error>(self, v: f64) -> Result<Self::Value, E> {
187        match LegacyF64::from_f64(v) {
188            Some(f) => Ok(ContentValue(Value::Float(f))),
189            None => Err(E::custom("invalid float")),
190        }
191    }
192
193    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
194        self.visit_string(v.to_string())
195    }
196
197    fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
198        Ok(ContentValue(Value::String(v)))
199    }
200
201    fn visit_unit<E>(self) -> Result<Self::Value, E> {
202        Ok(ContentValue(Value::Null))
203    }
204
205    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
206    where
207        A: SeqAccess<'de>,
208    {
209        // use the size hint, but put a maximum to the allocation because we can't trust the input
210        let mut v = Vec::with_capacity(std::cmp::min(seq.size_hint().unwrap_or(0), MAX_ALLOC));
211
212        while let Some(inner) = seq.next_element()? {
213            v.push(inner);
214        }
215
216        Ok(ContentValue(Value::Array(v)))
217    }
218
219    fn visit_map<A>(mut self, mut map: A) -> Result<Self::Value, A::Error>
220    where
221        A: MapAccess<'de>,
222    {
223        // use the size hint, but put a maximum to the allocation because we can't trust the input
224        let mut m = RidiculousStringMap::with_capacity(std::cmp::min(
225            map.size_hint().unwrap_or(0),
226            MAX_ALLOC,
227        ));
228
229        while let Some((key, val)) = map.next_entry::<String, Value>()? {
230            if self.0 && key == "type" {
231                match val {
232                    Value::String(ref type_str) => {
233                        if check_type_value(type_str) {
234                            self.0 = false;
235                        } else {
236                            return Err(A::Error::custom("content had invalid type"));
237                        }
238                    }
239                    _ => return Err(A::Error::custom("content type must be a string")),
240                }
241            }
242
243            if let Some(_) = m.insert(key, val) {
244                return Err(A::Error::custom("map had duplicate key"));
245            }
246        }
247
248        if self.0 {
249            return Err(A::Error::custom("content had no `type` entry"));
250        }
251
252        Ok(ContentValue(Value::Object(m)))
253    }
254}
255
256/// Check whether the given string is a valid `type` value of a content object.
257fn check_type_value(s: &str) -> bool {
258    let len = legacy_length(s);
259
260    if len < 3 || len > 53 {
261        false
262    } else {
263        true
264    }
265}
266
267/// A map with string keys that sorts strings according to
268/// [object entry order](https://spec.scuttlebutt.nz/datamodel.html#signing-encoding-objects),
269/// using insertion order for non-int keys.
270#[derive(Debug, PartialEq, Eq, Clone, Default)]
271pub struct RidiculousStringMap<V> {
272    // Keys that parse as natural numbers strictly less than 2^32 - 1, sorted numerically.
273    naturals: BTreeMap<GraphicolexicalString, V>,
274    // The remaining keys, sorted in insertion order.
275    others: IndexMap<String, V>,
276}
277
278impl<V> RidiculousStringMap<V> {
279    /// Create a new map with capacity for `n` key-value pairs. (Does not
280    /// allocate if `n` is zero.)
281    ///
282    /// This only preallocates capacity for non-numeric strings.
283    pub fn with_capacity(capacity: usize) -> RidiculousStringMap<V> {
284        RidiculousStringMap {
285            naturals: BTreeMap::new(),
286            others: IndexMap::with_capacity(capacity),
287        }
288    }
289
290    /// Returns the number of elements in the map.
291    pub fn len(&self) -> usize {
292        self.naturals.len() + self.others.len()
293    }
294
295    /// Inserts a key-value pair into the map.
296    ///
297    /// If the map did not have this key present, `None` is returned.
298    ///
299    /// If the map did have this key present, the value is updated, and the old
300    /// value is returned. The key is not updated, though; this matters for
301    /// types that can be `==` without being identical.
302    pub fn insert(&mut self, key: String, val: V) -> Option<V> {
303        if is_int_str(&key) {
304            self.naturals.insert(GraphicolexicalString(key), val)
305        } else {
306            self.others.insert(key, val)
307        }
308    }
309
310    /// Deletes a key-value pair from the map.
311    ///
312    pub fn remove(&mut self, key: String) -> Option<V> {
313        if is_int_str(&key) {
314            self.naturals.remove(&GraphicolexicalString(key))
315        } else {
316            self.others.remove(&key)
317        }
318    }
319
320    /// Gets an iterator over the entries of the map. It first yields all entries with
321    /// [numeric](https://spec.scuttlebutt.nz/datamodel.html#signing-encoding-objects) keys
322    /// in ascending order, and then the remaining entries in the same order in
323    /// which they were inserted.
324    pub fn iter(&self) -> Iter<V> {
325        Iter {
326            naturals: self.naturals.iter(),
327            others: self.others.iter(),
328            nats: true,
329        }
330    }
331
332    /// Returns a reference to the value corresponding to the key.
333    pub fn get(&self, key: &str) -> Option<&V> {
334        if is_int_str(key) {
335            self.naturals.get(key)
336        } else {
337            self.others.get(key)
338        }
339    }
340    /// Returns a reference to the value corresponding to the key.
341    pub fn get_mut(&mut self, key: &str) -> Option<&mut V> {
342        if is_int_str(key) {
343            self.naturals.get_mut(key)
344        } else {
345            self.others.get_mut(key)
346        }
347    }
348}
349
350fn is_int_str(s: &str) -> bool {
351    if s == "0" {
352        return true;
353    }
354
355    match s.as_bytes().split_first() {
356        Some((0x31..=0x39, tail)) => {
357            if tail.iter().all(|byte| *byte >= 0x30 && *byte <= 0x39) {
358                if tail.len() >= 10 {
359                    return false;
360                }
361
362                u64::from_str_radix(s, 10).unwrap() < (std::u32::MAX as u64)
363            } else {
364                false
365            }
366        }
367        _ => false,
368    }
369}
370
371#[test]
372fn test_is_int_str() {
373    assert!(is_int_str("0"));
374    assert!(!is_int_str("01"));
375    assert!(!is_int_str("00"));
376    assert!(is_int_str("5"));
377    assert!(is_int_str("4294967294"));
378    assert!(!is_int_str("4294967295"));
379    assert!(!is_int_str("4294967296"));
380    assert!(!is_int_str("4294967297"));
381    assert!(!is_int_str("42949672930"));
382    assert!(!is_int_str("42949672940"));
383    assert!(is_int_str("429496729"));
384    assert!(!is_int_str("52949672940"));
385}
386
387impl<'a, V> IntoIterator for &'a RidiculousStringMap<V> {
388    type Item = (&'a String, &'a V);
389    type IntoIter = Iter<'a, V>;
390
391    fn into_iter(self) -> Iter<'a, V> {
392        self.iter()
393    }
394}
395
396/// An iterator over the entries of a [`RidiculousStringMap`](RidiculousStringMap), first
397/// yielding all entries with
398/// [numeric](https://spec.scuttlebutt.nz/datamodel.html#signing-encoding-objects) keys
399/// in ascending order, and then yielding the remaining entries in the same order in
400/// which they were inserted into the map.
401pub struct Iter<'a, V> {
402    naturals: btree_map::Iter<'a, GraphicolexicalString, V>,
403    others: map::Iter<'a, String, V>,
404    nats: bool,
405}
406
407impl<'a, V> Iterator for Iter<'a, V> {
408    type Item = (&'a String, &'a V);
409
410    fn next(&mut self) -> Option<(&'a String, &'a V)> {
411        if self.nats {
412            match self.naturals.next() {
413                None => {
414                    self.nats = false;
415                    self.next()
416                }
417                Some((key, val)) => Some((&key.0, val)),
418            }
419        } else {
420            self.others.next()
421        }
422    }
423}
424
425// A wrapper around String, that compares by length first and uses lexicographical order as a
426// tie-breaker.
427#[derive(PartialEq, Eq, Clone, Hash)]
428struct GraphicolexicalString(String);
429
430impl PartialOrd for GraphicolexicalString {
431    fn partial_cmp(&self, other: &GraphicolexicalString) -> Option<Ordering> {
432        Some(self.cmp(other))
433    }
434}
435
436impl Ord for GraphicolexicalString {
437    fn cmp(&self, other: &GraphicolexicalString) -> Ordering {
438        match self.0.len().cmp(&other.0.len()) {
439            Ordering::Greater => Ordering::Greater,
440            Ordering::Less => Ordering::Less,
441            Ordering::Equal => self.0.cmp(&other.0),
442        }
443    }
444}
445
446impl fmt::Debug for GraphicolexicalString {
447    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
448        self.0.fmt(f)
449    }
450}
451
452impl Borrow<str> for GraphicolexicalString {
453    fn borrow(&self) -> &str {
454        self.0.borrow()
455    }
456}
457
458impl From<String> for GraphicolexicalString {
459    fn from(s: String) -> Self {
460        GraphicolexicalString(s)
461    }
462}
463
464impl From<GraphicolexicalString> for String {
465    fn from(s: GraphicolexicalString) -> Self {
466        s.0
467    }
468}