term_guard/analyzers/
types.rs

1//! Types for analyzer metrics and values.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::fmt;
6
7/// Represents different types of metric values that analyzers can produce.
8///
9/// This enum covers all common metric types needed for data quality analysis,
10/// from simple scalars to complex distributions.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12#[serde(tag = "type", content = "value")]
13pub enum MetricValue {
14    /// A floating-point metric value (e.g., mean, percentage).
15    Double(f64),
16
17    /// An integer metric value (e.g., count, size).
18    Long(i64),
19
20    /// A histogram distribution of values.
21    Histogram(MetricDistribution),
22
23    /// A vector of values (e.g., for multi-dimensional metrics).
24    Vector(Vec<f64>),
25
26    /// A string metric value (e.g., mode, most frequent value).
27    String(String),
28
29    /// A boolean metric value (e.g., presence/absence).
30    Boolean(bool),
31
32    /// A map of string keys to metric values (e.g., grouped metrics).
33    Map(HashMap<String, MetricValue>),
34}
35
36impl MetricValue {
37    /// Checks if the metric value is numeric (Double or Long).
38    pub fn is_numeric(&self) -> bool {
39        matches!(self, MetricValue::Double(_) | MetricValue::Long(_))
40    }
41
42    /// Attempts to get the numeric value as f64.
43    pub fn as_f64(&self) -> Option<f64> {
44        match self {
45            MetricValue::Double(v) => Some(*v),
46            MetricValue::Long(v) => Some(*v as f64),
47            _ => None,
48        }
49    }
50
51    /// Attempts to get the value as i64.
52    pub fn as_i64(&self) -> Option<i64> {
53        match self {
54            MetricValue::Long(v) => Some(*v),
55            MetricValue::Double(v) => {
56                if v.fract() == 0.0 {
57                    Some(*v as i64)
58                } else {
59                    None
60                }
61            }
62            _ => None,
63        }
64    }
65
66    /// Returns a human-readable string representation of the metric value.
67    pub fn to_string_pretty(&self) -> String {
68        match self {
69            MetricValue::Double(v) => {
70                if v.fract() == 0.0 {
71                    format!("{v:.0}")
72                } else {
73                    format!("{v:.4}")
74                }
75            }
76            MetricValue::Long(v) => v.to_string(),
77            MetricValue::String(s) => s.clone(),
78            MetricValue::Boolean(b) => b.to_string(),
79            MetricValue::Histogram(h) => format!("Histogram({} buckets)", h.buckets.len()),
80            MetricValue::Vector(v) => format!("Vector({} elements)", v.len()),
81            MetricValue::Map(m) => format!("Map({} entries)", m.len()),
82        }
83    }
84}
85
86impl fmt::Display for MetricValue {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        write!(f, "{}", self.to_string_pretty())
89    }
90}
91
92/// Represents a histogram distribution of values.
93///
94/// Used for metrics that capture value distributions rather than single values.
95#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
96pub struct MetricDistribution {
97    /// The histogram buckets with their boundaries and counts.
98    pub buckets: Vec<HistogramBucket>,
99
100    /// Total count of values in the distribution.
101    pub total_count: u64,
102
103    /// Minimum value in the distribution.
104    pub min: Option<f64>,
105
106    /// Maximum value in the distribution.
107    pub max: Option<f64>,
108
109    /// Mean value of the distribution.
110    pub mean: Option<f64>,
111
112    /// Standard deviation of the distribution.
113    pub std_dev: Option<f64>,
114}
115
116impl MetricDistribution {
117    /// Creates a new empty distribution.
118    pub fn new() -> Self {
119        Self {
120            buckets: Vec::new(),
121            total_count: 0,
122            min: None,
123            max: None,
124            mean: None,
125            std_dev: None,
126        }
127    }
128
129    /// Creates a distribution from a set of buckets.
130    pub fn from_buckets(buckets: Vec<HistogramBucket>) -> Self {
131        let total_count = buckets.iter().map(|b| b.count).sum();
132        Self {
133            buckets,
134            total_count,
135            min: None,
136            max: None,
137            mean: None,
138            std_dev: None,
139        }
140    }
141
142    /// Adds statistical summary information to the distribution.
143    pub fn with_stats(mut self, min: f64, max: f64, mean: f64, std_dev: f64) -> Self {
144        self.min = Some(min);
145        self.max = Some(max);
146        self.mean = Some(mean);
147        self.std_dev = Some(std_dev);
148        self
149    }
150}
151
152impl Default for MetricDistribution {
153    fn default() -> Self {
154        Self::new()
155    }
156}
157
158/// Represents a single bucket in a histogram.
159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
160pub struct HistogramBucket {
161    /// Lower bound of the bucket (inclusive).
162    pub lower_bound: f64,
163
164    /// Upper bound of the bucket (exclusive).
165    pub upper_bound: f64,
166
167    /// Count of values in this bucket.
168    pub count: u64,
169}
170
171impl HistogramBucket {
172    /// Creates a new histogram bucket.
173    pub fn new(lower_bound: f64, upper_bound: f64, count: u64) -> Self {
174        Self {
175            lower_bound,
176            upper_bound,
177            count,
178        }
179    }
180
181    /// Returns the width of the bucket.
182    pub fn width(&self) -> f64 {
183        self.upper_bound - self.lower_bound
184    }
185
186    /// Returns the midpoint of the bucket.
187    pub fn midpoint(&self) -> f64 {
188        (self.lower_bound + self.upper_bound) / 2.0
189    }
190}
191
192/// Type alias for analyzer-specific metric types.
193///
194/// This allows analyzers to define their own metric types while ensuring
195/// they can be converted to the standard MetricValue enum.
196pub trait AnalyzerMetric: Into<MetricValue> + Send + Sync + fmt::Debug {}
197
198/// Blanket implementation for MetricValue itself.
199impl AnalyzerMetric for MetricValue {}
200
201/// Implementation for f64 values.
202impl From<f64> for MetricValue {
203    fn from(value: f64) -> Self {
204        MetricValue::Double(value)
205    }
206}
207
208/// Implementation for i64 values.
209impl From<i64> for MetricValue {
210    fn from(value: i64) -> Self {
211        MetricValue::Long(value)
212    }
213}
214
215/// Implementation for bool values.
216impl From<bool> for MetricValue {
217    fn from(value: bool) -> Self {
218        MetricValue::Boolean(value)
219    }
220}
221
222/// Implementation for String values.
223impl From<String> for MetricValue {
224    fn from(value: String) -> Self {
225        MetricValue::String(value)
226    }
227}
228
229/// Implementation for &str values.
230impl From<&str> for MetricValue {
231    fn from(value: &str) -> Self {
232        MetricValue::String(value.to_string())
233    }
234}