bothan_lib/registry/processor/
median.rs

1// ! Median processor for combining source data values.
2//!
3//! This module provides a processor that calculates the median of multiple input values.
4//! Median calculation provides a robust measure of central tendency that is less affected
5//! by outliers compared to the arithmetic mean.
6//!
7//! The module provides:
8//!
9//! - The [`MedianProcessor`] struct which implements median calculation
10//! - Support for a minimum source count requirement
11//! - Proper handling of both odd and even number of data points
12//!
13//! # Median Calculation
14//!
15//! The median calculation follows these rules:
16//!
17//! - For odd number of values, the median is the middle value after sorting
18//! - For even number of values, the median is the average of the two middle values
19//! - Processing fails if fewer than the required minimum sources are available
20
21use std::cmp::max;
22use std::ops::{Add, Div, Sub};
23
24use bincode::{Decode, Encode};
25use num_traits::FromPrimitive;
26use rust_decimal::Decimal;
27use serde::{Deserialize, Serialize};
28
29use crate::registry::processor::ProcessError;
30
31/// Processor that calculates the median of input values.
32///
33/// The `MedianProcessor` computes the median of a set of input values, requiring a minimum
34/// number of data sources to be present. This provides a robust measure of central tendency
35/// that is resistant to outliers in the data.
36///
37/// # Minimum Source Count
38///
39/// The processor requires at least `min_source_count` data points to calculate a median.
40/// If fewer data points are provided, processing will fail with an error.
41///
42/// # Median Calculation
43///
44/// - For odd number of values, returns the middle value after sorting
45/// - For even number of values, returns the average of the two middle values
46///
47/// # Examples
48///
49/// ```
50/// use bothan_lib::registry::processor::{Processor, median::MedianProcessor};
51/// use rust_decimal::Decimal;
52///
53/// // Create a median processor that requires at least 3 sources
54/// let processor = Processor::Median(MedianProcessor { min_source_count: 3 });
55///
56/// // Process some data with 5 values
57/// let data = vec![
58///     ("source1".to_string(), Decimal::new(100, 0)),
59///     ("source2".to_string(), Decimal::new(200, 0)),
60///     ("source3".to_string(), Decimal::new(300, 0)),
61///     ("source4".to_string(), Decimal::new(400, 0)),
62///     ("source5".to_string(), Decimal::new(500, 0)),
63/// ];
64///
65/// let result = processor.process(data).unwrap();
66/// assert_eq!(result, Decimal::new(300, 0)); // Median is 300
67/// ```
68#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Encode, Decode)]
69pub struct MedianProcessor {
70    /// The minimum number of sources required to calculate the median.
71    ///
72    /// If fewer than this number of data points are provided, the processing
73    /// will fail with an error.
74    pub min_source_count: usize,
75}
76
77impl MedianProcessor {
78    /// Creates a new `MedianProcessor` with the specified minimum source count.
79    ///
80    /// The minimum source count determines how many data points are required at minimum
81    /// to calculate a valid median. This helps ensure the result has sufficient statistical
82    /// significance.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use bothan_lib::registry::processor::median::MedianProcessor;
88    ///
89    /// // Create a processor that requires at least 3 sources
90    /// let processor = MedianProcessor::new(3);
91    /// ```
92    pub fn new(min_source_count: usize) -> Self {
93        MedianProcessor { min_source_count }
94    }
95
96    /// Calculates the median of the provided decimal values.
97    ///
98    /// This method computes the median of the input values, failing if there are fewer
99    /// data points than the required minimum.
100    ///
101    /// # Errors
102    ///
103    /// Returns a `ProcessError` if there are fewer than `min_source_count` data points.
104    pub fn process(&self, data: Vec<Decimal>) -> Result<Decimal, ProcessError> {
105        if data.len() < max(self.min_source_count, 1) {
106            Err(ProcessError::new("Not enough sources to calculate median"))
107        } else {
108            Ok(median(data))
109        }
110    }
111}
112
113/// Calculates the median of a vector of values.
114///
115/// This function computes the median of the provided values:
116/// - For an odd number of elements, returns the middle element after sorting
117/// - For an even number of elements, returns the average of the two middle elements
118///
119/// The function is generic and works with any type that supports the required operations.
120fn median<T>(mut data: Vec<T>) -> T
121where
122    T: Ord + Copy + FromPrimitive + Add<Output = T> + Div<Output = T> + Sub<Output = T>,
123{
124    data.sort();
125    let mid = data.len() / 2;
126    if data.len() % 2 == 0 {
127        let b = data.swap_remove(mid);
128        let a = data.swap_remove(mid - 1);
129        ((b - a) / T::from_u8(2).unwrap()) + a
130    } else {
131        data.swap_remove(mid)
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn test_median_with_odd_count() {
141        let median = MedianProcessor::new(1);
142        let data = vec![
143            Decimal::from(510),
144            Decimal::from(202),
145            Decimal::from(10),
146            Decimal::from(4300),
147            Decimal::from(501),
148        ];
149        let res = median.process(data);
150
151        assert_eq!(res.unwrap(), Decimal::from(501));
152    }
153
154    #[test]
155    fn test_median_with_even_count() {
156        let median = MedianProcessor::new(1);
157        let data = vec![
158            Decimal::from(101000878),
159            Decimal::from(210),
160            Decimal::from(333120),
161            Decimal::from(4000),
162            Decimal::from(50120),
163            Decimal::from(50122),
164        ];
165        let res = median.process(data);
166
167        assert_eq!(res.unwrap(), Decimal::from(50121));
168    }
169}