ssb_validate/
utils.rs

1//! Helper functions used during validation computations.
2use lazy_static::lazy_static;
3use regex::{bytes::Regex as RegexBytes, Regex};
4use sha2::{Digest, Sha256};
5use snafu::ResultExt;
6use ssb_legacy_msg_data::json;
7use ssb_multiformats::multihash::Multihash;
8
9use crate::error::{InvalidMessageCouldNotSerializeValue, Result};
10use crate::message_value::SsbMessageValue;
11
12/// Check that the given string represents canonical base64.
13///
14/// A Regex pattern is used to match on canonical base64 for private messages. This has been
15/// implemented according to the [`is-canonical-base64` JS module](https://www.npmjs.com/package/is-canonical-base64) by Dominic Tarr.
16pub fn is_canonical_base64(private_msg: &str) -> bool {
17    lazy_static! {
18        static ref RE: Regex = Regex::new(r"^(?:[a-zA-Z0-9/+]{4})*(?:[a-zA-Z0-9/+](?:(?:[AQgw]==)|(?:[a-zA-Z0-9/+][AEIMQUYcgkosw048]=)))?.box.*$").unwrap();
19    }
20    RE.is_match(private_msg)
21}
22
23/// Check that the length of the given message - when serialized as JSON - is less than 8192 UTF-16 code units.
24pub fn is_correct_length(msg_value: &SsbMessageValue) -> Result<bool> {
25    // the second arg is used to set `compact` to `false` (preserves whitespace)
26    let msg_value_str =
27        json::to_string(msg_value, false).context(InvalidMessageCouldNotSerializeValue)?;
28    let msg_len: usize = msg_value_str.chars().map(|ch| ch.len_utf16()).sum();
29    if msg_len > 8192 {
30        Ok(false)
31    } else {
32        Ok(true)
33    }
34}
35
36/// Check that the top-level fields (keys) comprising the given message value are in the correct
37/// order.
38///
39/// The message value is expected to be provided in the form of a byte array. A regular expression
40/// is used to match on the order of the fields. The order of the second and third fields (`"author"` and
41/// `"sequence"`) can be reversed. For more information on this and other quirks, you may wish to peruse the issues and code for the JavaScript [ssb-validate library](https://github.com/ssb-js/ssb-validate).
42pub fn is_correct_order(bytes: &[u8]) -> bool {
43    lazy_static! {
44        static ref RE_B: RegexBytes = RegexBytes::new(r#""previous"[\s\S]*("author"|"sequence")[\s\S]*("author"|"sequence")[\s\S]*"timestamp"[\s\S]*"hash"[\s\S]*"content"[\s\S]*"signature""#).unwrap();
45    }
46    RE_B.is_match(bytes)
47}
48
49/// Generate a hash for a given message value.
50///
51/// The message value is expected to be provided in the form of a byte array. The string of the
52/// bytes is first encoded to UTF-16 before the hash is computed. Note that the hash is
53/// this case is sometimes referred to as a `key` (as in, `KVT` - key, value, timestamp) or as a
54/// `Multihash`. More information can be found in the [`Multihash` documentation](https://spec.scuttlebutt.nz/feed/datatypes.html#multihash).
55pub fn multihash_from_bytes(bytes: &[u8]) -> Multihash {
56    let value_bytes_latin = node_buffer_binary_serializer(std::str::from_utf8(bytes).unwrap());
57    let value_hash = Sha256::digest(value_bytes_latin.as_slice());
58    Multihash::Message(value_hash.into())
59}
60
61/// FML, scuttlebutt is miserable.
62///
63/// This is what node's `Buffer.new(messageString, 'binary')` does. Who knew?
64/// So, surprise, but the way ssb encodes messages for signing vs the way it encodes them for
65/// hashing is different.
66pub fn node_buffer_binary_serializer(text: &str) -> Vec<u8> {
67    text.encode_utf16()
68        .map(|word| (word & 0xFF) as u8)
69        .collect()
70}