substrate_median/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3#![cfg_attr(not(feature = "std"), no_std)]
4#![deny(missing_docs)]
5
6use core::cmp::Ordering;
7
8use scale::{EncodeLike, FullCodec};
9use frame_support::storage::*;
10
11mod lexicographic;
12pub use lexicographic::*;
13
14mod average;
15pub use average::*;
16
17mod policy;
18pub use policy::*;
19
20/// The store for a median.
21///
22/// `KeyPrefix` is accepted so a single set of storage values may be used to back multiple medians.
23/// For all `StorageDoubleMap`s however, the hasher of the second key MUST be the identity hasher.
24///
25/// For all storage values present, they MUST be considered opaque to the caller and left
26/// undisturbed. No assumptions may be made about their internal representation nor usage.
27/// Any names or documentation comments are solely for the review of the implementation
28/// itself, and are not intended to signify any potential layout nor use cases to the caller.
29/// ANY external usage has undefined behavior.
30pub trait MedianStore<KeyPrefix: FullCodec, MedianValue: Average + LexicographicEncoding> {
31  /// The policy to use when there are multiple candidate values.
32  const POLICY: Policy;
33
34  /// The amount of items currently present within the median's list.
35  type Length: StorageMap<KeyPrefix, u64, Query = u64>;
36
37  /// A store for the values currently present within the median.
38  ///
39  /// The value is the amount of instances of this value within the median's list.
40  type Store: IterableStorageDoubleMap<KeyPrefix, MedianValue::Encoding, u64, Query = u64>;
41
42  /// A secondary store for the values currently present within the median.
43  type ReverseStore: IterableStorageDoubleMap<
44    KeyPrefix,
45    LexicographicReverse<MedianValue>,
46    (),
47    Query = (),
48  >;
49
50  /// The position of the saved median within the list of values.
51  ///
52  /// This is necessary as when a value selected as the current median is present multiple times
53  /// within the list of values, the code does not know _which_ instance was selected as the
54  /// median, as necessary to know when to advance to a lesser/greater value. To resolve this, once
55  /// we know a value is the median value, we always set the position to the _first instance_ of
56  /// the value. This gives us a consistent frame of reference to decide the next steps of the
57  /// algorithm upon.
58  type Position: StorageMap<KeyPrefix, u64, Query = Option<u64>>;
59
60  /// The median value.
61  ///
62  /// This may drift from the actual median while an update is performed.
63  type Median: StorageMap<KeyPrefix, MedianValue, Query = Option<MedianValue>>;
64}
65
66const KEY_PREFIX_ASSERT: &str = "next value in storage had a different prefix associated";
67const AFTER_ASSERT: &str = "iterator yielding *after* key yielded key itself";
68
69/// Update the median.
70///
71/// This function may be called at any point to correctly calculate the current median. It will do
72/// so in an amount of operations linear to the distance from the stored median to the new median.
73///
74/// Since the distance is bounded by the amount of insertions to/removals from the median's list
75/// which have yet to be handled, the following `push` and `pop` functions achieve a constant
76/// amount of operations by calling this function _upon each and every invocation_. This leaves
77/// solely a singular insertion/removal needing to be handled, and a maximum distance of one.
78fn update_median<
79  KeyPrefix: FullCodec,
80  MedianValue: Average + LexicographicEncoding,
81  S: MedianStore<KeyPrefix, MedianValue>,
82>(
83  key_prefix: impl Copy + EncodeLike<KeyPrefix>,
84) {
85  let Some(mut existing_median_pos) = S::Position::get(key_prefix) else {
86    return;
87  };
88  let length = S::Length::get(key_prefix);
89  let target_median_pos = S::POLICY.target_median_pos(length);
90
91  let mut existing_median =
92    S::Median::get(key_prefix).expect("current position yet not current median");
93
94  // We first iterate up to the desired median position
95  {
96    let mut iter = {
97      let existing_median_key =
98        S::Store::hashed_key_for(key_prefix, existing_median.lexicographic_encode());
99      S::Store::iter_from(existing_median_key)
100    };
101
102    let mut existing_median_instances =
103      S::Store::get(key_prefix, existing_median.lexicographic_encode());
104    let mut next_value_first_pos;
105    while {
106      next_value_first_pos = existing_median_pos + existing_median_instances;
107      next_value_first_pos <= target_median_pos
108    } {
109      existing_median_pos = next_value_first_pos;
110      let (_key_prefix, next_value_encoding, next_value_instances) = iter
111        .next()
112        .expect("stored median was before the actual median yet no values were after it");
113      debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}");
114      debug_assert!(
115        existing_median.lexicographic_encode() != next_value_encoding,
116        "{AFTER_ASSERT}",
117      );
118      existing_median = MedianValue::lexicographic_decode(next_value_encoding);
119      existing_median_instances = next_value_instances;
120    }
121  }
122
123  // Then, we iterate down to the desired median position
124  {
125    let mut iter = {
126      let existing_median_key =
127        S::ReverseStore::hashed_key_for(key_prefix, LexicographicReverse::from(&existing_median));
128      S::ReverseStore::iter_keys_from(existing_median_key)
129    };
130
131    while existing_median_pos > target_median_pos {
132      let (_key_prefix, prior_value_encoding) = iter
133        .next()
134        .expect("stored median was before the actual median yet no values were after it");
135      debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}");
136      let prior_value = prior_value_encoding.into();
137      debug_assert!(prior_value != existing_median, "{AFTER_ASSERT}");
138      let prior_value_instances = S::Store::get(key_prefix, prior_value.lexicographic_encode());
139      existing_median = prior_value;
140      existing_median_pos -= prior_value_instances;
141    }
142  }
143
144  S::Position::set(key_prefix, Some(existing_median_pos));
145  S::Median::set(key_prefix, Some(existing_median));
146}
147
148/// A median.
149///
150/// The implementation only uses a constant amount of database operations to implement insertion
151/// and removal. When instantiated over a database with logarithmic complexities (such as a radix
152/// trie), this effects a median with logarithmic memory/computation complexities (not requiring
153/// loading all values into memory).
154///
155/// This SHOULD NOT be used for small collections where the linear (or even quadratic) complexities
156/// still out-perform how expensive database operations are. In those cases, the collection should
157/// be written to a single storage slot, read entirely, sorted, and the median should be
158/// immediately taken via indexing the value halfway through the collection.
159pub trait Median<KeyPrefix: FullCodec, MedianValue: Average + LexicographicEncoding>:
160  MedianStore<KeyPrefix, MedianValue>
161{
162  /// The current length of the median's list.
163  fn length(key_prefix: impl Copy + EncodeLike<KeyPrefix>) -> u64;
164
165  /// The current median value.
166  ///
167  /// This returns `None` if no values are present.
168  fn median(key_prefix: impl Copy + EncodeLike<KeyPrefix>) -> Option<MedianValue>;
169
170  /// Push a new value onto the median.
171  ///
172  /// If the value is already present within the existing values, the amount of times it will be
173  /// considered present will be incremented.
174  fn push(key_prefix: impl Copy + EncodeLike<KeyPrefix>, value: MedianValue);
175
176  /// Pop a value from the median.
177  ///
178  /// This returns `true` if the value was present and `false` otherwise.
179  ///
180  /// If the value is present within the existing values multiple times, only a single instance
181  /// will be removed.
182  fn pop(key_prefix: impl Copy + EncodeLike<KeyPrefix>, value: MedianValue) -> bool;
183}
184
185impl<
186    KeyPrefix: FullCodec,
187    MedianValue: Average + LexicographicEncoding,
188    S: MedianStore<KeyPrefix, MedianValue>,
189  > Median<KeyPrefix, MedianValue> for S
190{
191  fn length(key_prefix: impl Copy + EncodeLike<KeyPrefix>) -> u64 {
192    Self::Length::get(key_prefix)
193  }
194
195  /*
196    This function assumes `Position`, `Median` are up to date. This is guaranteed by
197    `update_median` being called after every single `push`, `pop` call, the only defined ways to
198    mutate the state.
199  */
200  fn median(key_prefix: impl Copy + EncodeLike<KeyPrefix>) -> Option<MedianValue> {
201    let mut current_median = Self::Median::get(key_prefix)?;
202    // If we're supposed to take the average, do so now
203    if matches!(S::POLICY, Policy::Average) {
204      let length = Self::length(key_prefix);
205      if (length % 2) == 0 {
206        // This will yield the target position for the lesser value in the pair
207        let target_median_pos_lo = Self::POLICY.target_median_pos(length);
208        let target_median_pos_hi = target_median_pos_lo + 1;
209
210        /*
211          We need to take the average of the current value and the next value, due to
212          `Policy::Average` internally being considered `Policy::Lesser` and solely differing here
213          when the median is fetched.
214
215          To fetch the next value, we first need to identify if `target_median_pos` points to the
216          _last instance_ of the currently selected median value. If it does not, then the next
217          value is another instance of this value, the average of them themselves, and we can
218          return now.
219
220          If `target_median_pos` does point to the last instance of the currently selected median
221          value, then we fetch the next key in our trie to learn the next value in order to take the
222          average.
223        */
224        let current_median_pos =
225          Self::Position::get(key_prefix).expect("current median yet no position");
226        let current_median_encoding = current_median.lexicographic_encode();
227        let inclusions = Self::Store::get(key_prefix, &current_median_encoding);
228        let start_pos_of_next_value = current_median_pos + inclusions;
229
230        // Short-circuit if we are averaging two of the same value
231        if target_median_pos_hi < start_pos_of_next_value {
232          return Some(current_median);
233        }
234
235        let current_median_key = Self::Store::hashed_key_for(key_prefix, &current_median_encoding);
236        let (_key_prefix, next_encoding) = Self::Store::iter_keys_from(current_median_key)
237          .next()
238          .expect("last value in storage yet looking for value after it");
239        debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}");
240        debug_assert!(current_median_encoding != next_encoding, "{AFTER_ASSERT}");
241        let next_value = MedianValue::lexicographic_decode(next_encoding);
242
243        current_median = MedianValue::average(current_median, next_value);
244      }
245    }
246    Some(current_median)
247  }
248
249  fn push(key_prefix: impl Copy + EncodeLike<KeyPrefix>, value: MedianValue) {
250    // Update the length
251    let existing_length = Self::Length::get(key_prefix);
252    let new_length = existing_length + 1;
253    Self::Length::set(key_prefix, new_length);
254
255    // Update the amount of inclusions
256    let encoding = value.lexicographic_encode();
257    {
258      let existing_presences = Self::Store::get(key_prefix, &encoding);
259      let new_presences = existing_presences + 1;
260      Self::Store::set(key_prefix, &encoding, new_presences);
261      if existing_presences == 0 {
262        Self::ReverseStore::set(key_prefix, LexicographicReverse::from_encoding(encoding), ());
263      }
264    }
265
266    // If this was the first value inserted, initialize and immediately return
267    if existing_length == 0 {
268      Self::Position::set(key_prefix, Some(0));
269      Self::Median::set(key_prefix, Some(value));
270      return;
271    }
272
273    // Fetch the current median
274    let existing_median =
275      Self::Median::get(key_prefix).expect("values within median yet no median");
276
277    // If this value was inserted before the current median, the current median's position has
278    // increased
279    if value < existing_median {
280      let mut existing_median_pos =
281        Self::Position::get(key_prefix).expect("values within median yet no current position");
282      existing_median_pos += 1;
283      Self::Position::set(key_prefix, Some(existing_median_pos));
284    }
285
286    // Update the median
287    update_median::<_, _, Self>(key_prefix);
288  }
289
290  fn pop(key_prefix: impl Copy + EncodeLike<KeyPrefix>, value: MedianValue) -> bool {
291    let encoding = value.lexicographic_encode();
292    let mut inclusions = Self::Store::get(key_prefix, &encoding);
293    if inclusions == 0 {
294      return false;
295    }
296
297    // Update the length
298    let existing_length = Self::Length::get(key_prefix);
299    let new_length = existing_length - 1;
300    Self::Length::set(key_prefix, new_length);
301
302    // Update the presence within the median's list
303    inclusions -= 1;
304    if inclusions == 0 {
305      Self::Store::remove(key_prefix, &encoding);
306      Self::ReverseStore::remove(key_prefix, LexicographicReverse::from_encoding(encoding));
307    } else {
308      Self::Store::set(key_prefix, encoding, inclusions);
309    }
310
311    let existing_median =
312      Self::Median::get(key_prefix).expect("values within median yet no median");
313    match value.cmp(&existing_median) {
314      Ordering::Less => {
315        let mut existing_median_pos =
316          Self::Position::get(key_prefix).expect("values within median yet no current position");
317        existing_median_pos -= 1;
318        Self::Position::set(key_prefix, Some(existing_median_pos));
319      }
320
321      Ordering::Equal if inclusions == 0 => {
322        /*
323          This value was the median, then removed, leaving `Median` and `Position` in an
324          ill-defined state. We attempt to consider `Position` as well-defined and solely update
325          `Median` to also be well-defined.
326
327          This works so long `Position` still refers to a valid position within the median's list.
328          It may not if the median's list started with length 1 or 2, where the current position
329          could have referred to the last element in the list, now popped.
330
331          If the length was 1, the list is now empty, triggering its own special case.
332
333          If the length was 2, we create a well-defined (and also accurate) definition for
334          `Position` and `Median` by setting them to the first (and only) item within
335          the list.
336        */
337        if new_length == 0 {
338          Self::Position::remove(key_prefix);
339          Self::Median::remove(key_prefix);
340        } else {
341          let mut existing_median_pos =
342            Self::Position::get(key_prefix).expect("values within median yet no current position");
343
344          let new_median_encoding = if existing_median_pos >= new_length {
345            /*
346              While resetting the declared median to the first item is always safe, so long as
347              `update_median` is called after (as done here), `update_median` has an algorithmic
348              complexity linear to the distance from the declared median to the correct median.
349              That means this can only be done, while maintaining the desired complexities, when a
350              bound is known on the distance from `0` to `target_median_pos`.
351
352              Since the list length is 1 in this case, per the reasoning above, the distance here
353              is `0`, making this a safe operation which also respects the desired complexities.
354            */
355            Self::Position::set(key_prefix, Some(0));
356            Self::Store::iter_key_prefix(key_prefix)
357              .next()
358              .expect("median list isn't empty yet has no values")
359          } else {
360            let existing_median_key =
361              Self::Store::hashed_key_for(key_prefix, existing_median.lexicographic_encode());
362            let (_key_prefix, next_value_encoding) =
363              Self::Store::iter_keys_from(existing_median_key)
364                .next()
365                .expect("current median wasn't the last value yet no value was after");
366            debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}");
367            debug_assert!(
368              existing_median.lexicographic_encode() != next_value_encoding,
369              "{AFTER_ASSERT}",
370            );
371            next_value_encoding
372          };
373
374          Self::Median::set(
375            key_prefix,
376            Some(MedianValue::lexicographic_decode(new_median_encoding)),
377          );
378        }
379      }
380
381      /*
382        If this value is an instance of the current median, for which some remain, we consider this
383        as removing an instance other than the first instance which is what the position refers to.
384        Accordingly, we don't have to update the position.
385
386        If this is greater than the current median, then its removal does not effect the position
387        of the current median.
388      */
389      Ordering::Equal | Ordering::Greater => {}
390    }
391
392    // Update the median
393    update_median::<_, _, Self>(key_prefix);
394
395    true
396  }
397}
398
399#[test]
400fn test_median() {
401  use frame_support::{
402    Blake2_128Concat, Identity,
403    storage::types::{self, ValueQuery, OptionQuery},
404  };
405
406  use rand_core::{RngCore, OsRng};
407
408  macro_rules! prefix {
409    ($name: ident, $prefix: expr) => {
410      struct $name;
411      impl frame_support::traits::StorageInstance for $name {
412        const STORAGE_PREFIX: &'static str = $prefix;
413        fn pallet_prefix() -> &'static str {
414          "median"
415        }
416      }
417    };
418  }
419  prefix!(PrefixLength, "Length");
420  prefix!(PrefixStore, "Store");
421  prefix!(PrefixReverse, "Reverse");
422  prefix!(PrefixPosition, "Position");
423  prefix!(PrefixMedian, "Median");
424
425  type StorageMapStruct<Prefix, Value, Query> =
426    types::StorageMap<Prefix, Blake2_128Concat, (), Value, Query>;
427  type StorageDoubleMapStruct<Prefix, Key, Value> =
428    types::StorageDoubleMap<Prefix, Blake2_128Concat, (), Identity, Key, Value, ValueQuery>;
429
430  macro_rules! test {
431    ($name: ident, $policy: expr) => {
432      struct $name;
433      impl MedianStore<(), u32> for $name {
434        const POLICY: Policy = $policy;
435        type Length = StorageMapStruct<PrefixLength, u64, ValueQuery>;
436        type Store =
437          StorageDoubleMapStruct<PrefixStore, <u32 as LexicographicEncoding>::Encoding, u64>;
438        type ReverseStore = StorageDoubleMapStruct<PrefixReverse, LexicographicReverse<u32>, ()>;
439        type Position = StorageMapStruct<PrefixPosition, u64, OptionQuery>;
440        type Median = StorageMapStruct<PrefixMedian, u32, OptionQuery>;
441      }
442
443      sp_io::TestExternalities::default().execute_with(|| {
444        assert_eq!($name::length(()), 0);
445        assert_eq!($name::median(()), None);
446
447        let mut current_list = vec![];
448        for i in 0 .. 1000 {
449          'reselect: loop {
450            // This chooses a modulus low enough this `match` will in fact match, yet high enough
451            // more cases can be added without forgetting to update it being an issue
452            match OsRng.next_u64() % 8 {
453              // Push a freshly sampled value
454              0 => {
455                #[allow(clippy::cast_possible_truncation)]
456                let push = OsRng.next_u64() as u32;
457                current_list.push(push);
458                current_list.sort();
459                $name::push((), push);
460              }
461              // Push an existing value
462              1 if !current_list.is_empty() => {
463                let i =
464                  usize::try_from(OsRng.next_u64() % u64::try_from(current_list.len()).unwrap())
465                    .unwrap();
466                let push = current_list[i];
467                current_list.push(push);
468                current_list.sort();
469                $name::push((), push);
470              }
471              // Remove an existing value
472              2 if !current_list.is_empty() => {
473                let i =
474                  usize::try_from(OsRng.next_u64() % u64::try_from(current_list.len()).unwrap())
475                    .unwrap();
476                let pop = current_list.remove(i);
477                assert!($name::pop((), pop));
478              }
479              // Remove a value which is not present
480              3 => {
481                #[allow(clippy::cast_possible_truncation)]
482                let pop = OsRng.next_u64() as u32;
483                if current_list.contains(&pop) {
484                  continue 'reselect;
485                }
486                assert!(!$name::pop((), pop));
487              }
488              _ => continue 'reselect,
489            }
490            break 'reselect;
491          }
492
493          assert_eq!(
494            $name::length(()),
495            u64::try_from(current_list.len()).unwrap(),
496            "length differs on iteration: {i}",
497          );
498          let target_median_pos =
499            $policy.target_median_pos(u64::try_from(current_list.len()).unwrap());
500          let target_median_pos = usize::try_from(target_median_pos).unwrap();
501          let expected = (!current_list.is_empty()).then(|| match $policy {
502            Policy::Greater | Policy::Lesser => current_list[target_median_pos],
503            Policy::Average => {
504              if (current_list.len() % 2) == 0 {
505                u32::average(current_list[target_median_pos], current_list[target_median_pos + 1])
506              } else {
507                current_list[target_median_pos]
508              }
509            }
510          });
511          assert_eq!($name::median(()), expected, "median differs on iteration: {i}");
512        }
513      });
514    };
515  }
516
517  test!(Greater, Policy::Greater);
518  test!(Lesser, Policy::Lesser);
519  test!(Average, Policy::Average);
520}