lnmp_codec/
normalizer.rs

1//! Value normalization system for semantic equivalence.
2//!
3//! This module provides value normalization to ensure semantically equivalent values
4//! produce identical checksums. Normalization rules include:
5//! - Boolean: Convert all representations (true/false, yes/no, 1/0) to canonical form
6//! - Float: Convert -0.0 to 0.0, remove trailing zeros
7//! - String: Apply case transformation based on configuration
8
9use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12/// String case transformation rules
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15    /// Convert to lowercase
16    Lower,
17    /// Convert to uppercase
18    Upper,
19    /// No case transformation
20    #[default]
21    None,
22}
23
24// Default implementation derived via #[derive(Default)]
25
26/// Configuration for value normalization
27#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29    /// String case transformation rule
30    pub string_case: StringCaseRule,
31    /// Optional decimal precision for floats
32    pub float_precision: Option<usize>,
33    /// Whether to remove trailing zeros from floats
34    pub remove_trailing_zeros: bool,
35    /// Optional semantic dictionary for equivalence normalization
36    pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40    fn default() -> Self {
41        Self {
42            string_case: StringCaseRule::None,
43            float_precision: None,
44            remove_trailing_zeros: true,
45            semantic_dictionary: None,
46        }
47    }
48}
49
50/// Value normalizer for semantic equivalence
51#[derive(Debug)]
52pub struct ValueNormalizer {
53    config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57    /// Creates a new normalizer with the given configuration
58    pub fn new(config: NormalizationConfig) -> Self {
59        Self { config }
60    }
61
62    /// Normalizes a value to its canonical form (no field context).
63    pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64        self.normalize_with_fid(None, value)
65    }
66
67    /// Normalizes a value with field context for dictionary-based mapping.
68    pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69        match value {
70            LnmpValue::Int(i) => LnmpValue::Int(*i),
71            LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72            LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73            LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74            LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75                arr.iter()
76                    .map(|s| self.normalize_string_for(fid, s))
77                    .collect(),
78            ),
79            LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80            LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81            LnmpValue::Embedding(vec) => LnmpValue::Embedding(vec.clone()),
82            LnmpValue::EmbeddingDelta(delta) => LnmpValue::EmbeddingDelta(delta.clone()),
83        }
84    }
85
86    /// Normalizes boolean representations to canonical form
87    ///
88    /// Converts common boolean representations:
89    /// - "true", "yes", "1" → true
90    /// - "false", "no", "0" → false
91    pub fn normalize_bool(&self, value: &str) -> Option<bool> {
92        match value.to_lowercase().as_str() {
93            "true" | "yes" | "1" => Some(true),
94            "false" | "no" | "0" => Some(false),
95            _ => None,
96        }
97    }
98
99    /// Normalizes float representations
100    ///
101    /// - Converts -0.0 to 0.0
102    /// - Removes trailing zeros after decimal point (if configured)
103    /// - Applies precision rounding (if configured)
104    fn normalize_float(&self, f: f64) -> f64 {
105        // Convert -0.0 to 0.0
106        let mut normalized = if f == 0.0 { 0.0 } else { f };
107
108        // Apply precision if configured
109        if let Some(precision) = self.config.float_precision {
110            let multiplier = 10_f64.powi(precision as i32);
111            normalized = (normalized * multiplier).round() / multiplier;
112        }
113
114        normalized
115    }
116
117    /// Normalizes string representations
118    ///
119    /// Applies case transformation based on configuration
120    fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
121        if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
122            if let Some(eq) = dict.get_equivalence(fid, s) {
123                return eq.to_string();
124            }
125            if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
126                return eq.to_string();
127            }
128        }
129
130        match self.config.string_case {
131            StringCaseRule::Lower => s.to_lowercase(),
132            StringCaseRule::Upper => s.to_uppercase(),
133            StringCaseRule::None => s.to_string(),
134        }
135    }
136
137    /// Formats a normalized float as a string with trailing zeros removed
138    pub fn format_float(&self, f: f64) -> String {
139        if !self.config.remove_trailing_zeros {
140            return f.to_string();
141        }
142
143        let s = f.to_string();
144
145        // If there's no decimal point, return as-is
146        if !s.contains('.') {
147            return s;
148        }
149
150        // Remove trailing zeros after decimal point
151        let trimmed = s.trim_end_matches('0').trim_end_matches('.');
152        trimmed.to_string()
153    }
154}
155
156impl Default for ValueNormalizer {
157    fn default() -> Self {
158        Self::new(NormalizationConfig::default())
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    #![allow(clippy::approx_constant)]
165
166    use super::*;
167
168    #[test]
169    fn test_default_config() {
170        let config = NormalizationConfig::default();
171        assert_eq!(config.string_case, StringCaseRule::None);
172        assert_eq!(config.float_precision, None);
173        assert!(config.remove_trailing_zeros);
174    }
175
176    #[test]
177    fn test_normalize_int() {
178        let normalizer = ValueNormalizer::default();
179        let value = LnmpValue::Int(42);
180        let normalized = normalizer.normalize(&value);
181        assert_eq!(normalized, LnmpValue::Int(42));
182    }
183
184    #[test]
185    fn test_normalize_bool() {
186        let normalizer = ValueNormalizer::default();
187        let value = LnmpValue::Bool(true);
188        let normalized = normalizer.normalize(&value);
189        assert_eq!(normalized, LnmpValue::Bool(true));
190    }
191
192    #[test]
193    fn test_normalize_bool_from_string() {
194        let normalizer = ValueNormalizer::default();
195
196        assert_eq!(normalizer.normalize_bool("true"), Some(true));
197        assert_eq!(normalizer.normalize_bool("True"), Some(true));
198        assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
199        assert_eq!(normalizer.normalize_bool("yes"), Some(true));
200        assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
201        assert_eq!(normalizer.normalize_bool("1"), Some(true));
202
203        assert_eq!(normalizer.normalize_bool("false"), Some(false));
204        assert_eq!(normalizer.normalize_bool("False"), Some(false));
205        assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
206        assert_eq!(normalizer.normalize_bool("no"), Some(false));
207        assert_eq!(normalizer.normalize_bool("No"), Some(false));
208        assert_eq!(normalizer.normalize_bool("0"), Some(false));
209
210        assert_eq!(normalizer.normalize_bool("invalid"), None);
211        assert_eq!(normalizer.normalize_bool(""), None);
212    }
213
214    #[test]
215    fn test_normalize_float_negative_zero() {
216        let normalizer = ValueNormalizer::default();
217        let value = LnmpValue::Float(-0.0);
218        let normalized = normalizer.normalize(&value);
219        assert_eq!(normalized, LnmpValue::Float(0.0));
220    }
221
222    #[test]
223    fn test_normalize_float_positive_zero() {
224        let normalizer = ValueNormalizer::default();
225        let value = LnmpValue::Float(0.0);
226        let normalized = normalizer.normalize(&value);
227        assert_eq!(normalized, LnmpValue::Float(0.0));
228    }
229
230    #[test]
231    fn test_normalize_float_regular() {
232        let normalizer = ValueNormalizer::default();
233        let value = LnmpValue::Float(3.14);
234        let normalized = normalizer.normalize(&value);
235        assert_eq!(normalized, LnmpValue::Float(3.14));
236    }
237
238    #[test]
239    fn test_normalize_float_with_precision() {
240        let config = NormalizationConfig {
241            string_case: StringCaseRule::None,
242            float_precision: Some(2),
243            remove_trailing_zeros: true,
244            semantic_dictionary: None,
245        };
246        let normalizer = ValueNormalizer::new(config);
247
248        let value = LnmpValue::Float(3.14159);
249        let normalized = normalizer.normalize(&value);
250        assert_eq!(normalized, LnmpValue::Float(3.14));
251    }
252
253    #[test]
254    fn test_format_float_remove_trailing_zeros() {
255        let normalizer = ValueNormalizer::default();
256
257        assert_eq!(normalizer.format_float(3.140), "3.14");
258        assert_eq!(normalizer.format_float(3.100), "3.1");
259        assert_eq!(normalizer.format_float(3.000), "3");
260        assert_eq!(normalizer.format_float(3.14), "3.14");
261        assert_eq!(normalizer.format_float(0.0), "0");
262    }
263
264    #[test]
265    fn test_format_float_keep_trailing_zeros() {
266        let config = NormalizationConfig {
267            string_case: StringCaseRule::None,
268            float_precision: None,
269            remove_trailing_zeros: false,
270            semantic_dictionary: None,
271        };
272        let normalizer = ValueNormalizer::new(config);
273
274        let formatted = normalizer.format_float(3.14);
275        assert!(formatted.starts_with("3.14"));
276    }
277
278    #[test]
279    fn test_normalize_string_no_case() {
280        let normalizer = ValueNormalizer::default();
281        let value = LnmpValue::String("Test".to_string());
282        let normalized = normalizer.normalize(&value);
283        assert_eq!(normalized, LnmpValue::String("Test".to_string()));
284    }
285
286    #[test]
287    fn test_normalize_string_lowercase() {
288        let config = NormalizationConfig {
289            string_case: StringCaseRule::Lower,
290            float_precision: None,
291            remove_trailing_zeros: true,
292            semantic_dictionary: None,
293        };
294        let normalizer = ValueNormalizer::new(config);
295
296        let value = LnmpValue::String("TeSt".to_string());
297        let normalized = normalizer.normalize(&value);
298        assert_eq!(normalized, LnmpValue::String("test".to_string()));
299    }
300
301    #[test]
302    fn test_normalize_string_uppercase() {
303        let config = NormalizationConfig {
304            string_case: StringCaseRule::Upper,
305            float_precision: None,
306            remove_trailing_zeros: true,
307            semantic_dictionary: None,
308        };
309        let normalizer = ValueNormalizer::new(config);
310
311        let value = LnmpValue::String("TeSt".to_string());
312        let normalized = normalizer.normalize(&value);
313        assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
314    }
315
316    #[test]
317    fn test_normalize_string_array() {
318        let config = NormalizationConfig {
319            string_case: StringCaseRule::Lower,
320            float_precision: None,
321            remove_trailing_zeros: true,
322            semantic_dictionary: None,
323        };
324        let normalizer = ValueNormalizer::new(config);
325
326        let value = LnmpValue::StringArray(vec![
327            "Admin".to_string(),
328            "Developer".to_string(),
329            "USER".to_string(),
330        ]);
331        let normalized = normalizer.normalize(&value);
332
333        assert_eq!(
334            normalized,
335            LnmpValue::StringArray(vec![
336                "admin".to_string(),
337                "developer".to_string(),
338                "user".to_string(),
339            ])
340        );
341    }
342
343    #[test]
344    fn test_normalize_nested_record() {
345        use lnmp_core::{LnmpField, LnmpRecord};
346
347        let normalizer = ValueNormalizer::default();
348
349        let mut record = LnmpRecord::new();
350        record.add_field(LnmpField {
351            fid: 1,
352            value: LnmpValue::Int(42),
353        });
354
355        let value = LnmpValue::NestedRecord(Box::new(record.clone()));
356        let normalized = normalizer.normalize(&value);
357
358        // Nested records are not modified by normalization
359        assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
360    }
361
362    #[test]
363    fn test_normalize_nested_array() {
364        use lnmp_core::{LnmpField, LnmpRecord};
365
366        let normalizer = ValueNormalizer::default();
367
368        let mut record = LnmpRecord::new();
369        record.add_field(LnmpField {
370            fid: 1,
371            value: LnmpValue::Int(42),
372        });
373
374        let value = LnmpValue::NestedArray(vec![record.clone()]);
375        let normalized = normalizer.normalize(&value);
376
377        // Nested arrays are not modified by normalization
378        assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
379    }
380
381    #[test]
382    fn test_string_case_rule_default() {
383        assert_eq!(StringCaseRule::default(), StringCaseRule::None);
384    }
385}