ssb_validate/message_value.rs
1//! Functions for validating message values (ie. just the `value` without `key` and `timestamp`).
2use rayon::prelude::*;
3use serde::{Deserialize, Serialize};
4use snafu::{ensure, OptionExt, ResultExt};
5use ssb_legacy_msg_data::{
6 json::from_slice,
7 value::{ContentValue, Value},
8 LegacyF64,
9};
10use ssb_multiformats::multihash::Multihash;
11
12use crate::error::{
13 AuthorsDidNotMatch, FirstMessageDidNotHavePreviousOfNull, FirstMessageDidNotHaveSequenceOfOne,
14 ForkedFeed, InvalidBase64, InvalidHashFunction, InvalidMessage, InvalidMessageValueLength,
15 InvalidMessageValueOrder, InvalidPreviousMessage, InvalidSequenceNumber, PreviousWasNull,
16 Result,
17};
18use crate::utils;
19
20/// Data type representing the `value` of a message object (`KVT`). More information concerning the
21/// data model can be found
22/// in the [`Metadata` documentation](https://spec.scuttlebutt.nz/feed/messages.html#metadata).
23#[derive(Serialize, Deserialize, Debug)]
24#[serde(deny_unknown_fields)]
25pub struct SsbMessageValue {
26 pub previous: Option<Multihash>,
27 pub author: String,
28 pub sequence: u64,
29 pub timestamp: LegacyF64,
30 pub hash: String,
31 pub content: ContentValue,
32 pub signature: String,
33}
34
35/// Batch validate a collection of message values, all by the same author, ordered by ascending sequence
36/// number, with no missing messages.
37///
38/// It expects the messages to be the JSON encoded message value of shape: `{
39/// previous: "",
40/// author: "",
41/// sequence: ...,
42/// timestamp: ...,
43/// content: {},
44/// signature: ""
45/// }`
46///
47/// This will mainly be useful during replication. Collect all the latest messages from a feed you're
48/// replicating and batch validate all the messages at once.
49///
50/// # Example
51///```
52///use ssb_validate::message_value::par_validate_message_value_hash_chain_of_feed;
53///let valid_message_1 = r##"{
54/// "previous": null,
55/// "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
56/// "sequence": 1,
57/// "timestamp": 1470186877575,
58/// "hash": "sha256",
59/// "content": {
60/// "type": "about",
61/// "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
62/// "name": "Piet"
63/// },
64/// "signature": "QJKWui3oyK6r5dH13xHkEVFhfMZDTXfK2tW21nyfheFClSf69yYK77Itj1BGcOimZ16pj9u3tMArLUCGSscqCQ==.sig.ed25519"
65///}"##;
66///let valid_message_2 = r##"{
67/// "previous": "%/v5mCnV/kmnVtnF3zXtD4tbzoEQo4kRq/0d/bgxP1WI=.sha256",
68/// "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
69/// "sequence": 2,
70/// "timestamp": 1470187292812,
71/// "hash": "sha256",
72/// "content": {
73/// "type": "about",
74/// "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
75/// "image": {
76/// "link": "&MxwsfZoq7X6oqnEX/TWIlAqd6S+jsUA6T1hqZYdl7RM=.sha256",
77/// "size": 642763,
78/// "type": "image/png",
79/// "width": 512,
80/// "height": 512
81/// }
82/// },
83/// "signature": "j3C7Us3JDnSUseF4ycRB0dTMs0xC6NAriAFtJWvx2uyz0K4zSj6XL8YA4BVqv+AHgo08+HxXGrpJlZ3ADwNnDw==.sig.ed25519"
84///}"##;
85/// let messages = [valid_message_1.as_bytes(), valid_message_2.as_bytes()];
86/// // If you're passing `None` as the `previous` argument you'll need to give the compiler a hint about
87/// // the type.
88/// let result = par_validate_message_value_hash_chain_of_feed::<_, &[u8]>(&messages, None);
89/// assert!(result.is_ok());
90///```
91pub fn par_validate_message_value_hash_chain_of_feed<T: AsRef<[u8]>, U: AsRef<[u8]>>(
92 messages: &[T],
93 previous: Option<U>,
94) -> Result<()>
95where
96 [T]: ParallelSlice<T>,
97 T: Sync,
98 U: Sync + Send + Copy,
99{
100 messages
101 .par_iter()
102 .enumerate()
103 .try_fold(
104 || (),
105 |_, (idx, msg)| {
106 if idx == 0 {
107 let prev = previous.map(|prev| prev.as_ref().to_owned());
108 validate_message_value_hash_chain(msg.as_ref(), prev)
109 } else {
110 validate_message_value_hash_chain(
111 msg.as_ref(),
112 Some(messages[idx - 1].as_ref()),
113 )
114 }
115 },
116 )
117 .try_reduce(|| (), |_, _| Ok(()))
118}
119
120/// Validate a message value in relation to the previous message value.
121///
122/// It expects the messages to be the JSON encoded message value of shape: `{
123/// previous: "",
124/// author: "",
125/// sequence: ...,
126/// timestamp: ...,
127/// content: {},
128/// signature: ""
129/// }`
130///
131/// This checks that:
132/// - the sequence starts at one if it's the first message
133/// - the previous is correctly set to null if it's the first message
134/// - the sequence increments correctly
135/// - the author has not changed
136/// - the feed is not forked
137///
138/// This does not check:
139/// - the signature. See ssb-verify-signatures which lets you to batch verification of signatures.
140///
141/// `previous_msg_bytes` will be `None` only when `message_bytes` is the first message by that author.
142///
143/// # Example
144///```
145///use ssb_validate::message_value::validate_message_value_hash_chain;
146///let valid_message_1 = r##"{
147/// "previous": null,
148/// "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
149/// "sequence": 1,
150/// "timestamp": 1470186877575,
151/// "hash": "sha256",
152/// "content": {
153/// "type": "about",
154/// "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
155/// "name": "Piet"
156/// },
157/// "signature": "QJKWui3oyK6r5dH13xHkEVFhfMZDTXfK2tW21nyfheFClSf69yYK77Itj1BGcOimZ16pj9u3tMArLUCGSscqCQ==.sig.ed25519"
158///}"##;
159///let valid_message_2 = r##"{
160/// "previous": "%/v5mCnV/kmnVtnF3zXtD4tbzoEQo4kRq/0d/bgxP1WI=.sha256",
161/// "author": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
162/// "sequence": 2,
163/// "timestamp": 1470187292812,
164/// "hash": "sha256",
165/// "content": {
166/// "type": "about",
167/// "about": "@U5GvOKP/YUza9k53DSXxT0mk3PIrnyAmessvNfZl5E0=.ed25519",
168/// "image": {
169/// "link": "&MxwsfZoq7X6oqnEX/TWIlAqd6S+jsUA6T1hqZYdl7RM=.sha256",
170/// "size": 642763,
171/// "type": "image/png",
172/// "width": 512,
173/// "height": 512
174/// }
175/// },
176/// "signature": "j3C7Us3JDnSUseF4ycRB0dTMs0xC6NAriAFtJWvx2uyz0K4zSj6XL8YA4BVqv+AHgo08+HxXGrpJlZ3ADwNnDw==.sig.ed25519"
177///}"##;
178///
179/// let result = validate_message_value_hash_chain(valid_message_2.as_bytes(),
180/// Some(valid_message_1.as_bytes()));
181/// assert!(result.is_ok());
182///```
183pub fn validate_message_value_hash_chain<T: AsRef<[u8]>, U: AsRef<[u8]>>(
184 message_bytes: T,
185 previous_msg_bytes: Option<U>,
186) -> Result<()> {
187 let message_bytes = message_bytes.as_ref();
188 // msg seq is 1 larger than previous
189 let (previous_value, previous_key) = match previous_msg_bytes {
190 Some(message) => {
191 let previous = from_slice::<SsbMessageValue>(message.as_ref()).context(
192 InvalidPreviousMessage {
193 message: message.as_ref().to_owned(),
194 },
195 )?;
196 let previous_key = utils::multihash_from_bytes(message.as_ref());
197 (Some(previous), Some(previous_key))
198 }
199 None => (None, None),
200 };
201
202 let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
203 message: message_bytes.to_owned(),
204 })?;
205
206 message_value_common_checks(
207 &message_value,
208 previous_value.as_ref(),
209 message_bytes,
210 previous_key.as_ref(),
211 // run checks for previous msg
212 true,
213 )?;
214
215 Ok(())
216}
217
218/// Validate a single message value (in isolation).
219///
220/// It expects the messages to be the JSON encoded message value of shape: `{
221/// previous: "",
222/// author: "",
223/// sequence: ...,
224/// timestamp: ...,
225/// content: {},
226/// signature: ""
227/// }`
228///
229/// This checks that:
230///
231/// - The fields (keys) of the message value are in the correct order
232/// - The hash signature is `sha256`
233/// - The message `content` is canonical base64 (if `content` is a string)
234/// - The message value does not exceed 8192 UTF-16 code units when serialized as a JSON string
235///
236/// This does not check:
237///
238/// - The signature. See ssb-verify-signatures which lets you to batch verification of signatures.
239/// - Anything to do with the `previous` message.
240pub fn validate_message_value<T: AsRef<[u8]>>(message_bytes: T) -> Result<()> {
241 let message_bytes = message_bytes.as_ref();
242 let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
243 message: message_bytes.to_owned(),
244 })?;
245
246 // perform common validation checks without `previous` message
247 message_value_common_checks(&message_value, None, message_bytes, None, false)?;
248
249 Ok(())
250}
251
252/// Batch validate a collection of message values. Messages are not required to be in order or to
253/// be authored by a single identity.
254///
255/// It expects the messages to be the JSON encoded message value of shape: `{
256/// previous: "",
257/// author: "",
258/// sequence: ...,
259/// timestamp: ...,
260/// content: {},
261/// signature: ""
262/// }`
263pub fn par_validate_message_value<T: AsRef<[u8]>>(messages: &[T]) -> Result<()>
264where
265 [T]: ParallelSlice<T>,
266 T: Sync,
267{
268 messages
269 .par_iter()
270 .enumerate()
271 .try_fold(|| (), |_, (_idx, msg)| validate_message_value(msg.as_ref()))
272 .try_reduce(|| (), |_, _| Ok(()))
273}
274
275/// Validate an out-of-order message value.
276///
277/// It expects the messages to be the JSON encoded message value of shape: `{
278/// previous: "",
279/// author: "",
280/// sequence: ...,
281/// timestamp: ...,
282/// content: {},
283/// signature: ""
284/// }`
285///
286/// This checks that:
287///
288/// - The fields (keys) of the message value are in the correct order
289/// - The hash signature is `sha256`
290/// - The message `content` is canonical base64 (if `content` is a string)
291/// - The message value does not exceed 8192 UTF-16 code units when serialized as a JSON string
292/// - The `author` of the message value matches the `author` of the previous message value
293///
294/// This does not check:
295///
296/// - The signature. See ssb-verify-signatures which lets you to batch verification of signatures
297/// - The `sequence` of the message in relation to the previous message
298/// - That the hash value for `previous` matches the actual hashed value of the previous message
299pub fn validate_ooo_message_value_hash_chain<T: AsRef<[u8]>, U: AsRef<[u8]>>(
300 message_bytes: T,
301 previous_msg_bytes: Option<U>,
302) -> Result<()> {
303 let message_bytes = message_bytes.as_ref();
304 // we need the value of the `previous` msg to check that the author has not changed
305 let previous_value = match previous_msg_bytes {
306 Some(message) => {
307 let previous = from_slice::<SsbMessageValue>(message.as_ref()).context(
308 InvalidPreviousMessage {
309 message: message.as_ref().to_owned(),
310 },
311 )?;
312 Some(previous)
313 }
314 None => (None),
315 };
316
317 let message_value = from_slice::<SsbMessageValue>(message_bytes).context(InvalidMessage {
318 message: message_bytes.to_owned(),
319 })?;
320
321 // perform common validation checks without `previous` message
322 message_value_common_checks(&message_value, None, message_bytes, None, false)?;
323
324 if let Some(previous_value) = previous_value.as_ref() {
325 // The authors are not allowed to change in a feed.
326 ensure!(
327 message_value.author == previous_value.author,
328 AuthorsDidNotMatch {
329 previous_author: previous_value.author.clone(),
330 author: message_value.author
331 }
332 );
333 }
334
335 Ok(())
336}
337
338/// Batch validate an out-of-order collection of message values from a single author.
339///
340/// It expects the messages to be the JSON encoded message value of shape: `{
341/// previous: "",
342/// author: "",
343/// sequence: ...,
344/// timestamp: ...,
345/// content: {},
346/// signature: ""
347/// }`
348pub fn par_validate_ooo_message_value_hash_chain_of_feed<T: AsRef<[u8]>, U: AsRef<[u8]>>(
349 messages: &[T],
350 previous: Option<U>,
351) -> Result<()>
352where
353 [T]: ParallelSlice<T>,
354 T: Sync,
355 U: Sync + Send + Copy,
356{
357 messages
358 .par_iter()
359 .enumerate()
360 .try_fold(
361 || (),
362 |_, (idx, msg)| {
363 if idx == 0 {
364 let prev = previous.map(|prev| prev.as_ref().to_owned());
365 validate_ooo_message_value_hash_chain(msg.as_ref(), prev)
366 } else {
367 validate_ooo_message_value_hash_chain(
368 msg.as_ref(),
369 Some(messages[idx - 1].as_ref()),
370 )
371 }
372 },
373 )
374 .try_reduce(|| (), |_, _| Ok(()))
375}
376
377/// Validation checks which are common across all contexts. The `check_previous` argument is used
378/// to control checks for the optional `previous_value` and `previous_key` parameters.
379pub fn message_value_common_checks(
380 message_value: &SsbMessageValue,
381 previous_value: Option<&SsbMessageValue>,
382 message_bytes: &[u8],
383 previous_key: Option<&Multihash>,
384 check_previous: bool,
385) -> Result<()> {
386 // The message value fields are in the correct order.
387 ensure!(
388 utils::is_correct_order(message_bytes),
389 InvalidMessageValueOrder {
390 message: message_bytes.to_owned()
391 }
392 );
393
394 // The hash signature must be `sha256`.
395 ensure!(
396 message_value.hash == "sha256",
397 InvalidHashFunction {
398 message: message_bytes.to_owned()
399 }
400 );
401
402 // The message `content` string must be canonical base64.
403 if let Value::String(private_msg) = &message_value.content.0 {
404 ensure!(
405 utils::is_canonical_base64(private_msg),
406 InvalidBase64 {
407 message: message_bytes,
408 }
409 );
410 }
411
412 if check_previous {
413 if let Some(previous_value) = previous_value {
414 // The authors are not allowed to change in a feed.
415 ensure!(
416 message_value.author == previous_value.author,
417 AuthorsDidNotMatch {
418 previous_author: previous_value.author.clone(),
419 author: message_value.author.clone()
420 }
421 );
422
423 // The sequence must increase by one.
424 let expected_sequence = previous_value.sequence + 1;
425 ensure!(
426 message_value.sequence == expected_sequence,
427 InvalidSequenceNumber {
428 message: message_bytes.to_owned(),
429 actual: message_value.sequence,
430 expected: expected_sequence
431 }
432 );
433
434 // msg previous must match hash of previous.value otherwise it's a fork.
435 ensure!(
436 message_value.previous.as_ref().context(PreviousWasNull)?
437 == previous_key.expect("expected the previous key to be Some(key), was None"),
438 ForkedFeed {
439 previous_seq: previous_value.sequence
440 }
441 );
442 } else {
443 // This message is the first message.
444
445 // Sequence must be 1.
446 ensure!(
447 message_value.sequence == 1,
448 FirstMessageDidNotHaveSequenceOfOne {
449 message: message_bytes.to_owned()
450 }
451 );
452 // Previous must be None.
453 ensure!(
454 message_value.previous.is_none(),
455 FirstMessageDidNotHavePreviousOfNull {
456 message: message_bytes.to_owned()
457 }
458 );
459 };
460 }
461
462 // The message `value` length must be less than 8192 UTF-16 code units.
463 // We check this last since serialization is expensive.
464 ensure!(
465 utils::is_correct_length(message_value)?,
466 InvalidMessageValueLength {
467 message: message_bytes.to_owned()
468 }
469 );
470
471 Ok(())
472}
473
474#[cfg(test)]
475mod tests {
476 use crate::message_value::{
477 par_validate_message_value, par_validate_message_value_hash_chain_of_feed,
478 par_validate_ooo_message_value_hash_chain_of_feed, validate_message_value,
479 validate_message_value_hash_chain, validate_ooo_message_value_hash_chain,
480 };
481 use crate::test_data::{
482 MESSAGE_VALUE_1, MESSAGE_VALUE_2, MESSAGE_VALUE_3, MESSAGE_VALUE_3_INCORRECT_AUTHOR,
483 };
484
485 #[test]
486 fn it_works_first_message_value() {
487 assert!(
488 validate_message_value_hash_chain::<_, &[u8]>(MESSAGE_VALUE_1.as_bytes(), None).is_ok()
489 );
490 }
491
492 #[test]
493 fn it_works_second_message_value() {
494 assert!(validate_message_value_hash_chain(
495 MESSAGE_VALUE_2.as_bytes(),
496 Some(MESSAGE_VALUE_1.as_bytes())
497 )
498 .is_ok());
499 }
500
501 #[test]
502 fn it_validates_an_ordered_sequence_of_message_values_in_parallel() {
503 let messages = [
504 MESSAGE_VALUE_1.as_bytes(),
505 MESSAGE_VALUE_2.as_bytes(),
506 MESSAGE_VALUE_3.as_bytes(),
507 ];
508 let result = par_validate_message_value_hash_chain_of_feed::<_, &[u8]>(&messages[..], None);
509 assert!(result.is_ok());
510 }
511
512 #[test]
513 fn it_validates_a_single_message_value() {
514 assert!(validate_message_value(MESSAGE_VALUE_2.as_bytes()).is_ok());
515 }
516
517 #[test]
518 fn it_validates_message_values_in_parallel() {
519 let messages = [MESSAGE_VALUE_1.as_bytes(), MESSAGE_VALUE_2.as_bytes()];
520 let result = par_validate_message_value(&messages[..]);
521 assert!(result.is_ok());
522 }
523
524 #[test]
525 fn it_validates_a_pair_of_ooo_message_values() {
526 assert!(validate_ooo_message_value_hash_chain(
527 MESSAGE_VALUE_2.as_bytes(),
528 Some(MESSAGE_VALUE_3.as_bytes())
529 )
530 .is_ok());
531 }
532
533 #[test]
534 fn it_validates_ooo_message_values_in_parallel() {
535 let messages = [
536 MESSAGE_VALUE_3.as_bytes(),
537 MESSAGE_VALUE_1.as_bytes(),
538 MESSAGE_VALUE_2.as_bytes(),
539 ];
540 let result =
541 par_validate_ooo_message_value_hash_chain_of_feed::<_, &[u8]>(&messages[..], None);
542 assert!(result.is_ok());
543 }
544
545 #[test]
546 fn it_validates_message_values_from_different_authors_in_parallel() {
547 let messages = [
548 MESSAGE_VALUE_2.as_bytes(),
549 MESSAGE_VALUE_3_INCORRECT_AUTHOR.as_bytes(),
550 ];
551 let result = par_validate_message_value(&messages[..]);
552 assert!(result.is_ok());
553 }
554}