Skip to main content

anomalyx_core/
value.rs

1//! The normalized scalar type. Every input format collapses into these cells.
2
3use serde::{Deserialize, Serialize};
4use std::cmp::Ordering;
5
6/// A single normalized cell value.
7///
8/// Arbitrary corpora (CSV, JSON, NDJSON, logs, Parquet, …) are reduced to a
9/// columnar grid of these. The variant set is intentionally small: detectors
10/// reason over a closed world, and "honest absence" is represented explicitly
11/// by [`Value::Null`] rather than by a sentinel number or empty string.
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13#[serde(tag = "t", content = "v", rename_all = "lowercase")]
14pub enum Value {
15    Null,
16    Bool(bool),
17    Int(i64),
18    Float(f64),
19    Str(String),
20}
21
22impl Value {
23    /// The column type this value contributes.
24    pub fn col_type(&self) -> ColType {
25        match self {
26            Value::Null => ColType::Unknown,
27            Value::Bool(_) => ColType::Bool,
28            Value::Int(_) => ColType::Int,
29            Value::Float(_) => ColType::Float,
30            Value::Str(_) => ColType::Str,
31        }
32    }
33
34    /// Numeric projection used by statistical detectors.
35    ///
36    /// `Int` and `Float` map to their `f64` value; `Bool` maps to `0.0`/`1.0`;
37    /// `Null` and `Str` are non-numeric and return `None`. Honest absence: a
38    /// `Null` never becomes a `0.0` that would skew a mean.
39    pub fn as_f64(&self) -> Option<f64> {
40        match self {
41            Value::Int(i) => Some(*i as f64),
42            Value::Float(f) => Some(*f),
43            Value::Bool(b) => Some(if *b { 1.0 } else { 0.0 }),
44            Value::Null | Value::Str(_) => None,
45        }
46    }
47
48    pub fn is_null(&self) -> bool {
49        matches!(self, Value::Null)
50    }
51
52    /// Canonical string form, used for categorical/frequency detectors and as
53    /// the basis for stable evidence handles.
54    pub fn canonical(&self) -> String {
55        match self {
56            Value::Null => "\u{0}null".to_string(),
57            Value::Bool(b) => format!("b:{b}"),
58            Value::Int(i) => format!("i:{i}"),
59            Value::Float(f) => format!("f:{:?}", f),
60            Value::Str(s) => format!("s:{s}"),
61        }
62    }
63}
64
65/// A total order over values, so detector output (and thus the envelope) is
66/// deterministic regardless of input ordering. Cross-variant ties break on a
67/// fixed variant rank; floats use [`f64::total_cmp`] so NaN has a defined seat.
68impl Value {
69    pub fn total_cmp(&self, other: &Value) -> Ordering {
70        fn rank(v: &Value) -> u8 {
71            match v {
72                Value::Null => 0,
73                Value::Bool(_) => 1,
74                Value::Int(_) => 2,
75                Value::Float(_) => 3,
76                Value::Str(_) => 4,
77            }
78        }
79        match (self, other) {
80            (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
81            (Value::Int(a), Value::Int(b)) => a.cmp(b),
82            (Value::Float(a), Value::Float(b)) => a.total_cmp(b),
83            (Value::Str(a), Value::Str(b)) => a.cmp(b),
84            _ => rank(self).cmp(&rank(other)),
85        }
86    }
87}
88
89/// The inferred logical type of a column.
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
91#[serde(rename_all = "lowercase")]
92pub enum ColType {
93    /// No non-null value has been observed yet.
94    Unknown,
95    Bool,
96    Int,
97    Float,
98    Str,
99    /// Conflicting concrete types observed in the same column.
100    Mixed,
101}
102
103impl ColType {
104    /// Least-upper-bound when folding cell types into a column type.
105    ///
106    /// `Unknown` is the identity; `Int`+`Float` widen to `Float`; any other
107    /// disagreement is `Mixed` (itself a structural anomaly signal, not an error).
108    pub fn unify(self, other: ColType) -> ColType {
109        use ColType::*;
110        match (self, other) {
111            (Unknown, x) | (x, Unknown) => x,
112            (a, b) if a == b => a,
113            (Int, Float) | (Float, Int) => Float,
114            _ => Mixed,
115        }
116    }
117
118    pub fn is_numeric(self) -> bool {
119        matches!(self, ColType::Int | ColType::Float | ColType::Bool)
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn null_is_not_numeric_zero() {
129        assert_eq!(Value::Null.as_f64(), None);
130    }
131
132    #[test]
133    fn canonical_forms_are_exact_and_disjoint() {
134        assert_eq!(Value::Bool(true).canonical(), "b:true");
135        assert_eq!(Value::Int(7).canonical(), "i:7");
136        assert_eq!(Value::Float(1.5).canonical(), "f:1.5");
137        assert_eq!(Value::Str("x".into()).canonical(), "s:x");
138        // null is distinct from the string "null"
139        assert_ne!(
140            Value::Null.canonical(),
141            Value::Str("null".into()).canonical()
142        );
143    }
144
145    #[test]
146    fn total_cmp_orders_within_variant() {
147        assert_eq!(
148            Value::Bool(false).total_cmp(&Value::Bool(true)),
149            Ordering::Less
150        );
151        assert_eq!(Value::Int(1).total_cmp(&Value::Int(2)), Ordering::Less);
152        assert_eq!(
153            Value::Float(1.0).total_cmp(&Value::Float(2.0)),
154            Ordering::Less
155        );
156        assert_eq!(
157            Value::Str("a".into()).total_cmp(&Value::Str("b".into())),
158            Ordering::Less
159        );
160    }
161
162    #[test]
163    fn total_cmp_orders_across_variants_by_rank() {
164        // Null < Bool < Int < Float < Str
165        let ordered = [
166            Value::Null,
167            Value::Bool(true),
168            Value::Int(0),
169            Value::Float(0.0),
170            Value::Str(String::new()),
171        ];
172        for i in 0..ordered.len() {
173            for j in 0..ordered.len() {
174                let expected = i.cmp(&j);
175                assert_eq!(ordered[i].total_cmp(&ordered[j]), expected, "i={i} j={j}");
176            }
177        }
178    }
179
180    #[test]
181    fn is_numeric_classification() {
182        assert!(ColType::Int.is_numeric());
183        assert!(ColType::Float.is_numeric());
184        assert!(ColType::Bool.is_numeric());
185        assert!(!ColType::Str.is_numeric());
186        assert!(!ColType::Unknown.is_numeric());
187        assert!(!ColType::Mixed.is_numeric());
188    }
189
190    #[test]
191    fn unify_widens_int_float() {
192        assert_eq!(ColType::Int.unify(ColType::Float), ColType::Float);
193        assert_eq!(ColType::Unknown.unify(ColType::Str), ColType::Str);
194        assert_eq!(ColType::Bool.unify(ColType::Str), ColType::Mixed);
195    }
196
197    #[test]
198    fn unify_is_commutative_and_idempotent() {
199        let types = [
200            ColType::Unknown,
201            ColType::Bool,
202            ColType::Int,
203            ColType::Float,
204            ColType::Str,
205            ColType::Mixed,
206        ];
207        for &a in &types {
208            assert_eq!(a.unify(a), a);
209            for &b in &types {
210                assert_eq!(a.unify(b), b.unify(a));
211            }
212        }
213    }
214}