lnmp_codec/
normalizer.rs

1//! Value normalization system for semantic equivalence.
2//!
3//! This module provides value normalization to ensure semantically equivalent values
4//! produce identical checksums. Normalization rules include:
5//! - Boolean: Convert all representations (true/false, yes/no, 1/0) to canonical form
6//! - Float: Convert -0.0 to 0.0, remove trailing zeros
7//! - String: Apply case transformation based on configuration
8
9use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12/// String case transformation rules
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15    /// Convert to lowercase
16    Lower,
17    /// Convert to uppercase
18    Upper,
19    /// No case transformation
20    #[default]
21    None,
22}
23
24// Default implementation derived via #[derive(Default)]
25
26/// Configuration for value normalization
27#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29    /// String case transformation rule
30    pub string_case: StringCaseRule,
31    /// Optional decimal precision for floats
32    pub float_precision: Option<usize>,
33    /// Whether to remove trailing zeros from floats
34    pub remove_trailing_zeros: bool,
35    /// Optional semantic dictionary for equivalence normalization
36    pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40    fn default() -> Self {
41        Self {
42            string_case: StringCaseRule::None,
43            float_precision: None,
44            remove_trailing_zeros: true,
45            semantic_dictionary: None,
46        }
47    }
48}
49
50/// Value normalizer for semantic equivalence
51#[derive(Debug)]
52pub struct ValueNormalizer {
53    config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57    /// Creates a new normalizer with the given configuration
58    pub fn new(config: NormalizationConfig) -> Self {
59        Self { config }
60    }
61
62    /// Normalizes a value to its canonical form (no field context).
63    pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64        self.normalize_with_fid(None, value)
65    }
66
67    /// Normalizes a value with field context for dictionary-based mapping.
68    pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69        match value {
70            LnmpValue::Int(i) => LnmpValue::Int(*i),
71            LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72            LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73            LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74            LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75                arr.iter()
76                    .map(|s| self.normalize_string_for(fid, s))
77                    .collect(),
78            ),
79            LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80            LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81            LnmpValue::Embedding(vec) => LnmpValue::Embedding(vec.clone()),
82            LnmpValue::EmbeddingDelta(delta) => LnmpValue::EmbeddingDelta(delta.clone()),
83            LnmpValue::QuantizedEmbedding(qv) => LnmpValue::QuantizedEmbedding(qv.clone()),
84        }
85    }
86
87    /// Normalizes boolean representations to canonical form
88    ///
89    /// Converts common boolean representations:
90    /// - "true", "yes", "1" → true
91    /// - "false", "no", "0" → false
92    pub fn normalize_bool(&self, value: &str) -> Option<bool> {
93        match value.to_lowercase().as_str() {
94            "true" | "yes" | "1" => Some(true),
95            "false" | "no" | "0" => Some(false),
96            _ => None,
97        }
98    }
99
100    /// Normalizes float representations
101    ///
102    /// - Converts -0.0 to 0.0
103    /// - Removes trailing zeros after decimal point (if configured)
104    /// - Applies precision rounding (if configured)
105    fn normalize_float(&self, f: f64) -> f64 {
106        // Convert -0.0 to 0.0
107        let mut normalized = if f == 0.0 { 0.0 } else { f };
108
109        // Apply precision if configured
110        if let Some(precision) = self.config.float_precision {
111            let multiplier = 10_f64.powi(precision as i32);
112            normalized = (normalized * multiplier).round() / multiplier;
113        }
114
115        normalized
116    }
117
118    /// Normalizes string representations
119    ///
120    /// Applies case transformation based on configuration
121    fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
122        if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
123            if let Some(eq) = dict.get_equivalence(fid, s) {
124                return eq.to_string();
125            }
126            if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
127                return eq.to_string();
128            }
129        }
130
131        match self.config.string_case {
132            StringCaseRule::Lower => s.to_lowercase(),
133            StringCaseRule::Upper => s.to_uppercase(),
134            StringCaseRule::None => s.to_string(),
135        }
136    }
137
138    /// Formats a normalized float as a string with trailing zeros removed
139    pub fn format_float(&self, f: f64) -> String {
140        if !self.config.remove_trailing_zeros {
141            return f.to_string();
142        }
143
144        let s = f.to_string();
145
146        // If there's no decimal point, return as-is
147        if !s.contains('.') {
148            return s;
149        }
150
151        // Remove trailing zeros after decimal point
152        let trimmed = s.trim_end_matches('0').trim_end_matches('.');
153        trimmed.to_string()
154    }
155}
156
157impl Default for ValueNormalizer {
158    fn default() -> Self {
159        Self::new(NormalizationConfig::default())
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    #![allow(clippy::approx_constant)]
166
167    use super::*;
168
169    #[test]
170    fn test_default_config() {
171        let config = NormalizationConfig::default();
172        assert_eq!(config.string_case, StringCaseRule::None);
173        assert_eq!(config.float_precision, None);
174        assert!(config.remove_trailing_zeros);
175    }
176
177    #[test]
178    fn test_normalize_int() {
179        let normalizer = ValueNormalizer::default();
180        let value = LnmpValue::Int(42);
181        let normalized = normalizer.normalize(&value);
182        assert_eq!(normalized, LnmpValue::Int(42));
183    }
184
185    #[test]
186    fn test_normalize_bool() {
187        let normalizer = ValueNormalizer::default();
188        let value = LnmpValue::Bool(true);
189        let normalized = normalizer.normalize(&value);
190        assert_eq!(normalized, LnmpValue::Bool(true));
191    }
192
193    #[test]
194    fn test_normalize_bool_from_string() {
195        let normalizer = ValueNormalizer::default();
196
197        assert_eq!(normalizer.normalize_bool("true"), Some(true));
198        assert_eq!(normalizer.normalize_bool("True"), Some(true));
199        assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
200        assert_eq!(normalizer.normalize_bool("yes"), Some(true));
201        assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
202        assert_eq!(normalizer.normalize_bool("1"), Some(true));
203
204        assert_eq!(normalizer.normalize_bool("false"), Some(false));
205        assert_eq!(normalizer.normalize_bool("False"), Some(false));
206        assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
207        assert_eq!(normalizer.normalize_bool("no"), Some(false));
208        assert_eq!(normalizer.normalize_bool("No"), Some(false));
209        assert_eq!(normalizer.normalize_bool("0"), Some(false));
210
211        assert_eq!(normalizer.normalize_bool("invalid"), None);
212        assert_eq!(normalizer.normalize_bool(""), None);
213    }
214
215    #[test]
216    fn test_normalize_float_negative_zero() {
217        let normalizer = ValueNormalizer::default();
218        let value = LnmpValue::Float(-0.0);
219        let normalized = normalizer.normalize(&value);
220        assert_eq!(normalized, LnmpValue::Float(0.0));
221    }
222
223    #[test]
224    fn test_normalize_float_positive_zero() {
225        let normalizer = ValueNormalizer::default();
226        let value = LnmpValue::Float(0.0);
227        let normalized = normalizer.normalize(&value);
228        assert_eq!(normalized, LnmpValue::Float(0.0));
229    }
230
231    #[test]
232    fn test_normalize_float_regular() {
233        let normalizer = ValueNormalizer::default();
234        let value = LnmpValue::Float(3.14);
235        let normalized = normalizer.normalize(&value);
236        assert_eq!(normalized, LnmpValue::Float(3.14));
237    }
238
239    #[test]
240    fn test_normalize_float_with_precision() {
241        let config = NormalizationConfig {
242            string_case: StringCaseRule::None,
243            float_precision: Some(2),
244            remove_trailing_zeros: true,
245            semantic_dictionary: None,
246        };
247        let normalizer = ValueNormalizer::new(config);
248
249        let value = LnmpValue::Float(3.14159);
250        let normalized = normalizer.normalize(&value);
251        assert_eq!(normalized, LnmpValue::Float(3.14));
252    }
253
254    #[test]
255    fn test_format_float_remove_trailing_zeros() {
256        let normalizer = ValueNormalizer::default();
257
258        assert_eq!(normalizer.format_float(3.140), "3.14");
259        assert_eq!(normalizer.format_float(3.100), "3.1");
260        assert_eq!(normalizer.format_float(3.000), "3");
261        assert_eq!(normalizer.format_float(3.14), "3.14");
262        assert_eq!(normalizer.format_float(0.0), "0");
263    }
264
265    #[test]
266    fn test_format_float_keep_trailing_zeros() {
267        let config = NormalizationConfig {
268            string_case: StringCaseRule::None,
269            float_precision: None,
270            remove_trailing_zeros: false,
271            semantic_dictionary: None,
272        };
273        let normalizer = ValueNormalizer::new(config);
274
275        let formatted = normalizer.format_float(3.14);
276        assert!(formatted.starts_with("3.14"));
277    }
278
279    #[test]
280    fn test_normalize_string_no_case() {
281        let normalizer = ValueNormalizer::default();
282        let value = LnmpValue::String("Test".to_string());
283        let normalized = normalizer.normalize(&value);
284        assert_eq!(normalized, LnmpValue::String("Test".to_string()));
285    }
286
287    #[test]
288    fn test_normalize_string_lowercase() {
289        let config = NormalizationConfig {
290            string_case: StringCaseRule::Lower,
291            float_precision: None,
292            remove_trailing_zeros: true,
293            semantic_dictionary: None,
294        };
295        let normalizer = ValueNormalizer::new(config);
296
297        let value = LnmpValue::String("TeSt".to_string());
298        let normalized = normalizer.normalize(&value);
299        assert_eq!(normalized, LnmpValue::String("test".to_string()));
300    }
301
302    #[test]
303    fn test_normalize_string_uppercase() {
304        let config = NormalizationConfig {
305            string_case: StringCaseRule::Upper,
306            float_precision: None,
307            remove_trailing_zeros: true,
308            semantic_dictionary: None,
309        };
310        let normalizer = ValueNormalizer::new(config);
311
312        let value = LnmpValue::String("TeSt".to_string());
313        let normalized = normalizer.normalize(&value);
314        assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
315    }
316
317    #[test]
318    fn test_normalize_string_array() {
319        let config = NormalizationConfig {
320            string_case: StringCaseRule::Lower,
321            float_precision: None,
322            remove_trailing_zeros: true,
323            semantic_dictionary: None,
324        };
325        let normalizer = ValueNormalizer::new(config);
326
327        let value = LnmpValue::StringArray(vec![
328            "Admin".to_string(),
329            "Developer".to_string(),
330            "USER".to_string(),
331        ]);
332        let normalized = normalizer.normalize(&value);
333
334        assert_eq!(
335            normalized,
336            LnmpValue::StringArray(vec![
337                "admin".to_string(),
338                "developer".to_string(),
339                "user".to_string(),
340            ])
341        );
342    }
343
344    #[test]
345    fn test_normalize_nested_record() {
346        use lnmp_core::{LnmpField, LnmpRecord};
347
348        let normalizer = ValueNormalizer::default();
349
350        let mut record = LnmpRecord::new();
351        record.add_field(LnmpField {
352            fid: 1,
353            value: LnmpValue::Int(42),
354        });
355
356        let value = LnmpValue::NestedRecord(Box::new(record.clone()));
357        let normalized = normalizer.normalize(&value);
358
359        // Nested records are not modified by normalization
360        assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
361    }
362
363    #[test]
364    fn test_normalize_nested_array() {
365        use lnmp_core::{LnmpField, LnmpRecord};
366
367        let normalizer = ValueNormalizer::default();
368
369        let mut record = LnmpRecord::new();
370        record.add_field(LnmpField {
371            fid: 1,
372            value: LnmpValue::Int(42),
373        });
374
375        let value = LnmpValue::NestedArray(vec![record.clone()]);
376        let normalized = normalizer.normalize(&value);
377
378        // Nested arrays are not modified by normalization
379        assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
380    }
381
382    #[test]
383    fn test_string_case_rule_default() {
384        assert_eq!(StringCaseRule::default(), StringCaseRule::None);
385    }
386}