ssb_validate/
message_value.rs

1//! Functions for validating message values (ie. just the `value` without `key` and `timestamp`).
2use rayon::prelude::*;
3use serde::{Deserialize, Serialize};
4use snafu::{ensure, OptionExt, ResultExt};
5use ssb_legacy_msg_data::{
6    json::from_slice,
7    value::{ContentValue, Value},
8    LegacyF64,
9};
10use ssb_multiformats::multihash::Multihash;
11
12use crate::error::{
13    AuthorsDidNotMatch, FirstMessageDidNotHavePreviousOfNull, FirstMessageDidNotHaveSequenceOfOne,
14    ForkedFeed, InvalidBase64, InvalidHashFunction, InvalidMessage, InvalidMessageValueLength,
15    InvalidMessageValueOrder, InvalidPreviousMessage, InvalidSequenceNumber, PreviousWasNull,
16    Result,
17};
18use crate::utils;
19
20/// Data type representing the `value` of a message object (`KVT`). More information concerning the
21/// data model can be found
22/// in the [`Metadata` documentation](https://spec.scuttlebutt.nz/feed/messages.html#metadata).
23#[derive(Serialize, Deserialize, Debug)]
24#[serde(deny_unknown_fields)]
25pub struct SsbMessageValue {
26    pub previous: Option<Multihash>,
27    pub author: String,
28    pub sequence: u64,
29    pub timestamp: LegacyF64,
30    pub hash: String,
31    pub content: ContentValue,
32    pub signature: String,
33}
34
35/// Batch validate a collection of message values, all by the same author, ordered by ascending sequence
36/// number, with no missing messages.
37///
38/// It expects the messages to be the JSON encoded message value of shape: `{
39/// previous: "",
40/// author: "",
41/// sequence: ...,
42/// timestamp: ...,
43/// content: {},
44/// signature: ""
45/// }`
46///
47/// This will mainly be useful during replication. Collect all the latest messages from a feed you're
48/// replicating and batch validate all the messages at once.
49///
50/// # Example
51///```
52///use ssb_validate::message_value::par_validate_message_value_hash_chain_of_feed;
53///let valid_message_1 = r##"{
54///  "previous": null,
55///  "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
56///  "sequence": 1,
57///  "timestamp": 1470186877575,
58///  "hash": "sha256",
59///  "content": {
60///    "type": "about",
61///    "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
62///    "name": "Piet"
63///  },
64///  "signature": "QJKWui3oyK6r5dH13xHkEVFhfMZDTXfK2tW21nyfheFClSf69yYK77Itj1BGcOimZ16pj9u3tMArLUCGSscqCQ==.sig.ed25519"
65///}"##;
66///let valid_message_2 = r##"{
67///  "previous": "%/v5mCnV/kmnVtnF3zXtD4tbzoEQo4kRq/0d/bgxP1WI=.sha256",
68///  "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
69///  "sequence": 2,
70///  "timestamp": 1470187292812,
71///  "hash": "sha256",
72///  "content": {
73///    "type": "about",
74///    "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
75///    "image": {
76///      "link": "&MxwsfZoq7X6oqnEX/TWIlAqd6S+jsUA6T1hqZYdl7RM=.sha256",
77///      "size": 642763,
78///      "type": "image/png",
79///      "width": 512,
80///      "height": 512
81///    }
82///  },
83///  "signature": "j3C7Us3JDnSUseF4ycRB0dTMs0xC6NAriAFtJWvx2uyz0K4zSj6XL8YA4BVqv+AHgo08+HxXGrpJlZ3ADwNnDw==.sig.ed25519"
84///}"##;
85/// let messages = [valid_message_1.as_bytes(), valid_message_2.as_bytes()];
86/// // If you're passing `None` as the `previous` argument you'll need to give the compiler a hint about
87/// // the type.
88/// let result = par_validate_message_value_hash_chain_of_feed::<_, &[u8]>(&messages, None);
89/// assert!(result.is_ok());
90///```
91pub fn par_validate_message_value_hash_chain_of_feed<T: AsRef<[u8]>, U: AsRef<[u8]>>(
92    messages: &[T],
93    previous: Option<U>,
94) -> Result<()>
95where
96    [T]: ParallelSlice<T>,
97    T: Sync,
98    U: Sync + Send + Copy,
99{
100    messages
101        .par_iter()
102        .enumerate()
103        .try_fold(
104            || (),
105            |_, (idx, msg)| {
106                if idx == 0 {
107                    let prev = previous.map(|prev| prev.as_ref().to_owned());
108                    validate_message_value_hash_chain(msg.as_ref(), prev)
109                } else {
110                    validate_message_value_hash_chain(
111                        msg.as_ref(),
112                        Some(messages[idx - 1].as_ref()),
113                    )
114                }
115            },
116        )
117        .try_reduce(|| (), |_, _| Ok(()))
118}
119
120/// Validate a message value in relation to the previous message value.
121///
122/// It expects the messages to be the JSON encoded message value of shape: `{
123/// previous: "",
124/// author: "",
125/// sequence: ...,
126/// timestamp: ...,
127/// content: {},
128/// signature: ""
129/// }`
130///
131/// This checks that:
132/// - the sequence starts at one if it's the first message
133/// - the previous is correctly set to null if it's the first message
134/// - the sequence increments correctly
135/// - the author has not changed
136/// - the feed is not forked
137///
138/// This does not check:
139/// - the signature. See ssb-verify-signatures which lets you to batch verification of signatures.
140///
141/// `previous_msg_bytes` will be `None` only when `message_bytes` is the first message by that author.
142///
143/// # Example
144///```
145///use ssb_validate::message_value::validate_message_value_hash_chain;
146///let valid_message_1 = r##"{
147///  "previous": null,
148///  "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
149///  "sequence": 1,
150///  "timestamp": 1470186877575,
151///  "hash": "sha256",
152///  "content": {
153///    "type": "about",
154///    "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
155///    "name": "Piet"
156///  },
157///  "signature": "QJKWui3oyK6r5dH13xHkEVFhfMZDTXfK2tW21nyfheFClSf69yYK77Itj1BGcOimZ16pj9u3tMArLUCGSscqCQ==.sig.ed25519"
158///}"##;
159///let valid_message_2 = r##"{
160///  "previous": "%/v5mCnV/kmnVtnF3zXtD4tbzoEQo4kRq/0d/bgxP1WI=.sha256",
161///  "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
162///  "sequence": 2,
163///  "timestamp": 1470187292812,
164///  "hash": "sha256",
165///  "content": {
166///    "type": "about",
167///    "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
168///    "image": {
169///      "link": "&MxwsfZoq7X6oqnEX/TWIlAqd6S+jsUA6T1hqZYdl7RM=.sha256",
170///      "size": 642763,
171///      "type": "image/png",
172///      "width": 512,
173///      "height": 512
174///    }
175///  },
176///  "signature": "j3C7Us3JDnSUseF4ycRB0dTMs0xC6NAriAFtJWvx2uyz0K4zSj6XL8YA4BVqv+AHgo08+HxXGrpJlZ3ADwNnDw==.sig.ed25519"
177///}"##;
178///
179/// let result = validate_message_value_hash_chain(valid_message_2.as_bytes(),
180/// Some(valid_message_1.as_bytes()));
181/// assert!(result.is_ok());
182///```
183pub fn validate_message_value_hash_chain<T: AsRef<[u8]>, U: AsRef<[u8]>>(
184    message_bytes: T,
185    previous_msg_bytes: Option<U>,
186) -> Result<()> {
187    let message_bytes = message_bytes.as_ref();
188    // msg seq is 1 larger than previous
189    let (previous_value, previous_key) = match previous_msg_bytes {
190        Some(message) => {
191            let previous = from_slice::<SsbMessageValue>(message.as_ref()).context(
192                InvalidPreviousMessage {
193                    message: message.as_ref().to_owned(),
194                },
195            )?;
196            let previous_key = utils::multihash_from_bytes(message.as_ref());
197            (Some(previous), Some(previous_key))
198        }
199        None => (None, None),
200    };
201
202    let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
203        message: message_bytes.to_owned(),
204    })?;
205
206    message_value_common_checks(
207        &message_value,
208        previous_value.as_ref(),
209        message_bytes,
210        previous_key.as_ref(),
211        // run checks for previous msg
212        true,
213    )?;
214
215    Ok(())
216}
217
218/// Validate a single message value (in isolation).
219///
220/// It expects the messages to be the JSON encoded message value of shape: `{
221/// previous: "",
222/// author: "",
223/// sequence: ...,
224/// timestamp: ...,
225/// content: {},
226/// signature: ""
227/// }`
228///
229/// This checks that:
230///
231/// - The fields (keys) of the message value are in the correct order
232/// - The hash signature is `sha256`
233/// - The message `content` is canonical base64 (if `content` is a string)
234/// - The message value does not exceed 8192 UTF-16 code units when serialized as a JSON string
235///
236/// This does not check:
237///
238/// - The signature. See ssb-verify-signatures which lets you to batch verification of signatures.
239/// - Anything to do with the `previous` message.
240pub fn validate_message_value<T: AsRef<[u8]>>(message_bytes: T) -> Result<()> {
241    let message_bytes = message_bytes.as_ref();
242    let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
243        message: message_bytes.to_owned(),
244    })?;
245
246    // perform common validation checks without `previous` message
247    message_value_common_checks(&message_value, None, message_bytes, None, false)?;
248
249    Ok(())
250}
251
252/// Batch validate a collection of message values. Messages are not required to be in order or to
253/// be authored by a single identity.
254///
255/// It expects the messages to be the JSON encoded message value of shape: `{
256/// previous: "",
257/// author: "",
258/// sequence: ...,
259/// timestamp: ...,
260/// content: {},
261/// signature: ""
262/// }`
263pub fn par_validate_message_value<T: AsRef<[u8]>>(messages: &[T]) -> Result<()>
264where
265    [T]: ParallelSlice<T>,
266    T: Sync,
267{
268    messages
269        .par_iter()
270        .enumerate()
271        .try_fold(|| (), |_, (_idx, msg)| validate_message_value(msg.as_ref()))
272        .try_reduce(|| (), |_, _| Ok(()))
273}
274
275/// Validate an out-of-order message value.
276///
277/// It expects the messages to be the JSON encoded message value of shape: `{
278/// previous: "",
279/// author: "",
280/// sequence: ...,
281/// timestamp: ...,
282/// content: {},
283/// signature: ""
284/// }`
285///
286/// This checks that:
287///
288/// - The fields (keys) of the message value are in the correct order
289/// - The hash signature is `sha256`
290/// - The message `content` is canonical base64 (if `content` is a string)
291/// - The message value does not exceed 8192 UTF-16 code units when serialized as a JSON string
292/// - The `author` of the message value matches the `author` of the previous message value
293///
294/// This does not check:
295///
296/// - The signature. See ssb-verify-signatures which lets you to batch verification of signatures
297/// - The `sequence` of the message in relation to the previous message
298/// - That the hash value for `previous` matches the actual hashed value of the previous message
299pub fn validate_ooo_message_value_hash_chain<T: AsRef<[u8]>, U: AsRef<[u8]>>(
300    message_bytes: T,
301    previous_msg_bytes: Option<U>,
302) -> Result<()> {
303    let message_bytes = message_bytes.as_ref();
304    // we need the value of the `previous` msg to check that the author has not changed
305    let previous_value = match previous_msg_bytes {
306        Some(message) => {
307            let previous = from_slice::<SsbMessageValue>(message.as_ref()).context(
308                InvalidPreviousMessage {
309                    message: message.as_ref().to_owned(),
310                },
311            )?;
312            Some(previous)
313        }
314        None => (None),
315    };
316
317    let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
318        message: message_bytes.to_owned(),
319    })?;
320
321    // perform common validation checks without `previous` message
322    message_value_common_checks(&message_value, None, message_bytes, None, false)?;
323
324    if let Some(previous_value) = previous_value.as_ref() {
325        // The authors are not allowed to change in a feed.
326        ensure!(
327            message_value.author == previous_value.author,
328            AuthorsDidNotMatch {
329                previous_author: previous_value.author.clone(),
330                author: message_value.author
331            }
332        );
333    }
334
335    Ok(())
336}
337
338/// Batch validate an out-of-order collection of message values from a single author.
339///
340/// It expects the messages to be the JSON encoded message value of shape: `{
341/// previous: "",
342/// author: "",
343/// sequence: ...,
344/// timestamp: ...,
345/// content: {},
346/// signature: ""
347/// }`
348pub fn par_validate_ooo_message_value_hash_chain_of_feed<T: AsRef<[u8]>, U: AsRef<[u8]>>(
349    messages: &[T],
350    previous: Option<U>,
351) -> Result<()>
352where
353    [T]: ParallelSlice<T>,
354    T: Sync,
355    U: Sync + Send + Copy,
356{
357    messages
358        .par_iter()
359        .enumerate()
360        .try_fold(
361            || (),
362            |_, (idx, msg)| {
363                if idx == 0 {
364                    let prev = previous.map(|prev| prev.as_ref().to_owned());
365                    validate_ooo_message_value_hash_chain(msg.as_ref(), prev)
366                } else {
367                    validate_ooo_message_value_hash_chain(
368                        msg.as_ref(),
369                        Some(messages[idx - 1].as_ref()),
370                    )
371                }
372            },
373        )
374        .try_reduce(|| (), |_, _| Ok(()))
375}
376
377/// Validation checks which are common across all contexts. The `check_previous` argument is used
378/// to control checks for the optional `previous_value` and `previous_key` parameters.
379pub fn message_value_common_checks(
380    message_value: &SsbMessageValue,
381    previous_value: Option<&SsbMessageValue>,
382    message_bytes: &[u8],
383    previous_key: Option<&Multihash>,
384    check_previous: bool,
385) -> Result<()> {
386    // The message value fields are in the correct order.
387    ensure!(
388        utils::is_correct_order(message_bytes),
389        InvalidMessageValueOrder {
390            message: message_bytes.to_owned()
391        }
392    );
393
394    // The hash signature must be `sha256`.
395    ensure!(
396        message_value.hash == "sha256",
397        InvalidHashFunction {
398            message: message_bytes.to_owned()
399        }
400    );
401
402    // The message `content` string must be canonical base64.
403    if let Value::String(private_msg) = &message_value.content.0 {
404        ensure!(
405            utils::is_canonical_base64(private_msg),
406            InvalidBase64 {
407                message: message_bytes,
408            }
409        );
410    }
411
412    if check_previous {
413        if let Some(previous_value) = previous_value {
414            // The authors are not allowed to change in a feed.
415            ensure!(
416                message_value.author == previous_value.author,
417                AuthorsDidNotMatch {
418                    previous_author: previous_value.author.clone(),
419                    author: message_value.author.clone()
420                }
421            );
422
423            // The sequence must increase by one.
424            let expected_sequence = previous_value.sequence + 1;
425            ensure!(
426                message_value.sequence == expected_sequence,
427                InvalidSequenceNumber {
428                    message: message_bytes.to_owned(),
429                    actual: message_value.sequence,
430                    expected: expected_sequence
431                }
432            );
433
434            // msg previous must match hash of previous.value otherwise it's a fork.
435            ensure!(
436                message_value.previous.as_ref().context(PreviousWasNull)?
437                    == previous_key.expect("expected the previous key to be Some(key), was None"),
438                ForkedFeed {
439                    previous_seq: previous_value.sequence
440                }
441            );
442        } else {
443            // This message is the first message.
444
445            // Sequence must be 1.
446            ensure!(
447                message_value.sequence == 1,
448                FirstMessageDidNotHaveSequenceOfOne {
449                    message: message_bytes.to_owned()
450                }
451            );
452            // Previous must be None.
453            ensure!(
454                message_value.previous.is_none(),
455                FirstMessageDidNotHavePreviousOfNull {
456                    message: message_bytes.to_owned()
457                }
458            );
459        };
460    }
461
462    // The message `value` length must be less than 8192 UTF-16 code units.
463    // We check this last since serialization is expensive.
464    ensure!(
465        utils::is_correct_length(message_value)?,
466        InvalidMessageValueLength {
467            message: message_bytes.to_owned()
468        }
469    );
470
471    Ok(())
472}
473
474#[cfg(test)]
475mod tests {
476    use crate::message_value::{
477        par_validate_message_value, par_validate_message_value_hash_chain_of_feed,
478        par_validate_ooo_message_value_hash_chain_of_feed, validate_message_value,
479        validate_message_value_hash_chain, validate_ooo_message_value_hash_chain,
480    };
481    use crate::test_data::{
482        MESSAGE_VALUE_1, MESSAGE_VALUE_2, MESSAGE_VALUE_3, MESSAGE_VALUE_3_INCORRECT_AUTHOR,
483    };
484
485    #[test]
486    fn it_works_first_message_value() {
487        assert!(
488            validate_message_value_hash_chain::<_, &[u8]>(MESSAGE_VALUE_1.as_bytes(), None).is_ok()
489        );
490    }
491
492    #[test]
493    fn it_works_second_message_value() {
494        assert!(validate_message_value_hash_chain(
495            MESSAGE_VALUE_2.as_bytes(),
496            Some(MESSAGE_VALUE_1.as_bytes())
497        )
498        .is_ok());
499    }
500
501    #[test]
502    fn it_validates_an_ordered_sequence_of_message_values_in_parallel() {
503        let messages = [
504            MESSAGE_VALUE_1.as_bytes(),
505            MESSAGE_VALUE_2.as_bytes(),
506            MESSAGE_VALUE_3.as_bytes(),
507        ];
508        let result = par_validate_message_value_hash_chain_of_feed::<_, &[u8]>(&messages[..], None);
509        assert!(result.is_ok());
510    }
511
512    #[test]
513    fn it_validates_a_single_message_value() {
514        assert!(validate_message_value(MESSAGE_VALUE_2.as_bytes()).is_ok());
515    }
516
517    #[test]
518    fn it_validates_message_values_in_parallel() {
519        let messages = [MESSAGE_VALUE_1.as_bytes(), MESSAGE_VALUE_2.as_bytes()];
520        let result = par_validate_message_value(&messages[..]);
521        assert!(result.is_ok());
522    }
523
524    #[test]
525    fn it_validates_a_pair_of_ooo_message_values() {
526        assert!(validate_ooo_message_value_hash_chain(
527            MESSAGE_VALUE_2.as_bytes(),
528            Some(MESSAGE_VALUE_3.as_bytes())
529        )
530        .is_ok());
531    }
532
533    #[test]
534    fn it_validates_ooo_message_values_in_parallel() {
535        let messages = [
536            MESSAGE_VALUE_3.as_bytes(),
537            MESSAGE_VALUE_1.as_bytes(),
538            MESSAGE_VALUE_2.as_bytes(),
539        ];
540        let result =
541            par_validate_ooo_message_value_hash_chain_of_feed::<_, &[u8]>(&messages[..], None);
542        assert!(result.is_ok());
543    }
544
545    #[test]
546    fn it_validates_message_values_from_different_authors_in_parallel() {
547        let messages = [
548            MESSAGE_VALUE_2.as_bytes(),
549            MESSAGE_VALUE_3_INCORRECT_AUTHOR.as_bytes(),
550        ];
551        let result = par_validate_message_value(&messages[..]);
552        assert!(result.is_ok());
553    }
554}