lnmp_codec/
normalizer.rs

1//! Value normalization system for semantic equivalence.
2//!
3//! This module provides value normalization to ensure semantically equivalent values
4//! produce identical checksums. Normalization rules include:
5//! - Boolean: Convert all representations (true/false, yes/no, 1/0) to canonical form
6//! - Float: Convert -0.0 to 0.0, remove trailing zeros
7//! - String: Apply case transformation based on configuration
8
9use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12/// String case transformation rules
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15    /// Convert to lowercase
16    Lower,
17    /// Convert to uppercase
18    Upper,
19    /// No case transformation
20    #[default]
21    None,
22}
23
24// Default implementation derived via #[derive(Default)]
25
26/// Configuration for value normalization
27#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29    /// String case transformation rule
30    pub string_case: StringCaseRule,
31    /// Optional decimal precision for floats
32    pub float_precision: Option<usize>,
33    /// Whether to remove trailing zeros from floats
34    pub remove_trailing_zeros: bool,
35    /// Optional semantic dictionary for equivalence normalization
36    pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40    fn default() -> Self {
41        Self {
42            string_case: StringCaseRule::None,
43            float_precision: None,
44            remove_trailing_zeros: true,
45            semantic_dictionary: None,
46        }
47    }
48}
49
50/// Value normalizer for semantic equivalence
51#[derive(Debug)]
52pub struct ValueNormalizer {
53    config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57    /// Creates a new normalizer with the given configuration
58    pub fn new(config: NormalizationConfig) -> Self {
59        Self { config }
60    }
61
62    /// Normalizes a value to its canonical form (no field context).
63    pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64        self.normalize_with_fid(None, value)
65    }
66
67    /// Normalizes a value with field context for dictionary-based mapping.
68    pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69        match value {
70            LnmpValue::Int(i) => LnmpValue::Int(*i),
71            LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72            LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73            LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74            LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75                arr.iter()
76                    .map(|s| self.normalize_string_for(fid, s))
77                    .collect(),
78            ),
79            LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80            LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81        }
82    }
83
84    /// Normalizes boolean representations to canonical form
85    ///
86    /// Converts common boolean representations:
87    /// - "true", "yes", "1" → true
88    /// - "false", "no", "0" → false
89    pub fn normalize_bool(&self, value: &str) -> Option<bool> {
90        match value.to_lowercase().as_str() {
91            "true" | "yes" | "1" => Some(true),
92            "false" | "no" | "0" => Some(false),
93            _ => None,
94        }
95    }
96
97    /// Normalizes float representations
98    ///
99    /// - Converts -0.0 to 0.0
100    /// - Removes trailing zeros after decimal point (if configured)
101    /// - Applies precision rounding (if configured)
102    fn normalize_float(&self, f: f64) -> f64 {
103        // Convert -0.0 to 0.0
104        let mut normalized = if f == 0.0 { 0.0 } else { f };
105
106        // Apply precision if configured
107        if let Some(precision) = self.config.float_precision {
108            let multiplier = 10_f64.powi(precision as i32);
109            normalized = (normalized * multiplier).round() / multiplier;
110        }
111
112        normalized
113    }
114
115    /// Normalizes string representations
116    ///
117    /// Applies case transformation based on configuration
118    fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
119        if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
120            if let Some(eq) = dict.get_equivalence(fid, s) {
121                return eq.to_string();
122            }
123            if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
124                return eq.to_string();
125            }
126        }
127
128        match self.config.string_case {
129            StringCaseRule::Lower => s.to_lowercase(),
130            StringCaseRule::Upper => s.to_uppercase(),
131            StringCaseRule::None => s.to_string(),
132        }
133    }
134
135    /// Formats a normalized float as a string with trailing zeros removed
136    pub fn format_float(&self, f: f64) -> String {
137        if !self.config.remove_trailing_zeros {
138            return f.to_string();
139        }
140
141        let s = f.to_string();
142
143        // If there's no decimal point, return as-is
144        if !s.contains('.') {
145            return s;
146        }
147
148        // Remove trailing zeros after decimal point
149        let trimmed = s.trim_end_matches('0').trim_end_matches('.');
150        trimmed.to_string()
151    }
152}
153
154impl Default for ValueNormalizer {
155    fn default() -> Self {
156        Self::new(NormalizationConfig::default())
157    }
158}
159
160#[cfg(test)]
161mod tests {
162    #![allow(clippy::approx_constant)]
163
164    use super::*;
165
166    #[test]
167    fn test_default_config() {
168        let config = NormalizationConfig::default();
169        assert_eq!(config.string_case, StringCaseRule::None);
170        assert_eq!(config.float_precision, None);
171        assert!(config.remove_trailing_zeros);
172    }
173
174    #[test]
175    fn test_normalize_int() {
176        let normalizer = ValueNormalizer::default();
177        let value = LnmpValue::Int(42);
178        let normalized = normalizer.normalize(&value);
179        assert_eq!(normalized, LnmpValue::Int(42));
180    }
181
182    #[test]
183    fn test_normalize_bool() {
184        let normalizer = ValueNormalizer::default();
185        let value = LnmpValue::Bool(true);
186        let normalized = normalizer.normalize(&value);
187        assert_eq!(normalized, LnmpValue::Bool(true));
188    }
189
190    #[test]
191    fn test_normalize_bool_from_string() {
192        let normalizer = ValueNormalizer::default();
193
194        assert_eq!(normalizer.normalize_bool("true"), Some(true));
195        assert_eq!(normalizer.normalize_bool("True"), Some(true));
196        assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
197        assert_eq!(normalizer.normalize_bool("yes"), Some(true));
198        assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
199        assert_eq!(normalizer.normalize_bool("1"), Some(true));
200
201        assert_eq!(normalizer.normalize_bool("false"), Some(false));
202        assert_eq!(normalizer.normalize_bool("False"), Some(false));
203        assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
204        assert_eq!(normalizer.normalize_bool("no"), Some(false));
205        assert_eq!(normalizer.normalize_bool("No"), Some(false));
206        assert_eq!(normalizer.normalize_bool("0"), Some(false));
207
208        assert_eq!(normalizer.normalize_bool("invalid"), None);
209        assert_eq!(normalizer.normalize_bool(""), None);
210    }
211
212    #[test]
213    fn test_normalize_float_negative_zero() {
214        let normalizer = ValueNormalizer::default();
215        let value = LnmpValue::Float(-0.0);
216        let normalized = normalizer.normalize(&value);
217        assert_eq!(normalized, LnmpValue::Float(0.0));
218    }
219
220    #[test]
221    fn test_normalize_float_positive_zero() {
222        let normalizer = ValueNormalizer::default();
223        let value = LnmpValue::Float(0.0);
224        let normalized = normalizer.normalize(&value);
225        assert_eq!(normalized, LnmpValue::Float(0.0));
226    }
227
228    #[test]
229    fn test_normalize_float_regular() {
230        let normalizer = ValueNormalizer::default();
231        let value = LnmpValue::Float(3.14);
232        let normalized = normalizer.normalize(&value);
233        assert_eq!(normalized, LnmpValue::Float(3.14));
234    }
235
236    #[test]
237    fn test_normalize_float_with_precision() {
238        let config = NormalizationConfig {
239            string_case: StringCaseRule::None,
240            float_precision: Some(2),
241            remove_trailing_zeros: true,
242            semantic_dictionary: None,
243        };
244        let normalizer = ValueNormalizer::new(config);
245
246        let value = LnmpValue::Float(3.14159);
247        let normalized = normalizer.normalize(&value);
248        assert_eq!(normalized, LnmpValue::Float(3.14));
249    }
250
251    #[test]
252    fn test_format_float_remove_trailing_zeros() {
253        let normalizer = ValueNormalizer::default();
254
255        assert_eq!(normalizer.format_float(3.140), "3.14");
256        assert_eq!(normalizer.format_float(3.100), "3.1");
257        assert_eq!(normalizer.format_float(3.000), "3");
258        assert_eq!(normalizer.format_float(3.14), "3.14");
259        assert_eq!(normalizer.format_float(0.0), "0");
260    }
261
262    #[test]
263    fn test_format_float_keep_trailing_zeros() {
264        let config = NormalizationConfig {
265            string_case: StringCaseRule::None,
266            float_precision: None,
267            remove_trailing_zeros: false,
268            semantic_dictionary: None,
269        };
270        let normalizer = ValueNormalizer::new(config);
271
272        let formatted = normalizer.format_float(3.14);
273        assert!(formatted.starts_with("3.14"));
274    }
275
276    #[test]
277    fn test_normalize_string_no_case() {
278        let normalizer = ValueNormalizer::default();
279        let value = LnmpValue::String("Test".to_string());
280        let normalized = normalizer.normalize(&value);
281        assert_eq!(normalized, LnmpValue::String("Test".to_string()));
282    }
283
284    #[test]
285    fn test_normalize_string_lowercase() {
286        let config = NormalizationConfig {
287            string_case: StringCaseRule::Lower,
288            float_precision: None,
289            remove_trailing_zeros: true,
290            semantic_dictionary: None,
291        };
292        let normalizer = ValueNormalizer::new(config);
293
294        let value = LnmpValue::String("TeSt".to_string());
295        let normalized = normalizer.normalize(&value);
296        assert_eq!(normalized, LnmpValue::String("test".to_string()));
297    }
298
299    #[test]
300    fn test_normalize_string_uppercase() {
301        let config = NormalizationConfig {
302            string_case: StringCaseRule::Upper,
303            float_precision: None,
304            remove_trailing_zeros: true,
305            semantic_dictionary: None,
306        };
307        let normalizer = ValueNormalizer::new(config);
308
309        let value = LnmpValue::String("TeSt".to_string());
310        let normalized = normalizer.normalize(&value);
311        assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
312    }
313
314    #[test]
315    fn test_normalize_string_array() {
316        let config = NormalizationConfig {
317            string_case: StringCaseRule::Lower,
318            float_precision: None,
319            remove_trailing_zeros: true,
320            semantic_dictionary: None,
321        };
322        let normalizer = ValueNormalizer::new(config);
323
324        let value = LnmpValue::StringArray(vec![
325            "Admin".to_string(),
326            "Developer".to_string(),
327            "USER".to_string(),
328        ]);
329        let normalized = normalizer.normalize(&value);
330
331        assert_eq!(
332            normalized,
333            LnmpValue::StringArray(vec![
334                "admin".to_string(),
335                "developer".to_string(),
336                "user".to_string(),
337            ])
338        );
339    }
340
341    #[test]
342    fn test_normalize_nested_record() {
343        use lnmp_core::{LnmpField, LnmpRecord};
344
345        let normalizer = ValueNormalizer::default();
346
347        let mut record = LnmpRecord::new();
348        record.add_field(LnmpField {
349            fid: 1,
350            value: LnmpValue::Int(42),
351        });
352
353        let value = LnmpValue::NestedRecord(Box::new(record.clone()));
354        let normalized = normalizer.normalize(&value);
355
356        // Nested records are not modified by normalization
357        assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
358    }
359
360    #[test]
361    fn test_normalize_nested_array() {
362        use lnmp_core::{LnmpField, LnmpRecord};
363
364        let normalizer = ValueNormalizer::default();
365
366        let mut record = LnmpRecord::new();
367        record.add_field(LnmpField {
368            fid: 1,
369            value: LnmpValue::Int(42),
370        });
371
372        let value = LnmpValue::NestedArray(vec![record.clone()]);
373        let normalized = normalizer.normalize(&value);
374
375        // Nested arrays are not modified by normalization
376        assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
377    }
378
379    #[test]
380    fn test_string_case_rule_default() {
381        assert_eq!(StringCaseRule::default(), StringCaseRule::None);
382    }
383}