1use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15 Lower,
17 Upper,
19 #[default]
21 None,
22}
23
24#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29 pub string_case: StringCaseRule,
31 pub float_precision: Option<usize>,
33 pub remove_trailing_zeros: bool,
35 pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40 fn default() -> Self {
41 Self {
42 string_case: StringCaseRule::None,
43 float_precision: None,
44 remove_trailing_zeros: true,
45 semantic_dictionary: None,
46 }
47 }
48}
49
50#[derive(Debug)]
52pub struct ValueNormalizer {
53 config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57 pub fn new(config: NormalizationConfig) -> Self {
59 Self { config }
60 }
61
62 pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64 self.normalize_with_fid(None, value)
65 }
66
67 pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69 match value {
70 LnmpValue::Int(i) => LnmpValue::Int(*i),
71 LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72 LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73 LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74 LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75 arr.iter()
76 .map(|s| self.normalize_string_for(fid, s))
77 .collect(),
78 ),
79 LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80 LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81 LnmpValue::Embedding(vec) => LnmpValue::Embedding(vec.clone()),
82 LnmpValue::EmbeddingDelta(delta) => LnmpValue::EmbeddingDelta(delta.clone()),
83 }
84 }
85
86 pub fn normalize_bool(&self, value: &str) -> Option<bool> {
92 match value.to_lowercase().as_str() {
93 "true" | "yes" | "1" => Some(true),
94 "false" | "no" | "0" => Some(false),
95 _ => None,
96 }
97 }
98
99 fn normalize_float(&self, f: f64) -> f64 {
105 let mut normalized = if f == 0.0 { 0.0 } else { f };
107
108 if let Some(precision) = self.config.float_precision {
110 let multiplier = 10_f64.powi(precision as i32);
111 normalized = (normalized * multiplier).round() / multiplier;
112 }
113
114 normalized
115 }
116
117 fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
121 if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
122 if let Some(eq) = dict.get_equivalence(fid, s) {
123 return eq.to_string();
124 }
125 if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
126 return eq.to_string();
127 }
128 }
129
130 match self.config.string_case {
131 StringCaseRule::Lower => s.to_lowercase(),
132 StringCaseRule::Upper => s.to_uppercase(),
133 StringCaseRule::None => s.to_string(),
134 }
135 }
136
137 pub fn format_float(&self, f: f64) -> String {
139 if !self.config.remove_trailing_zeros {
140 return f.to_string();
141 }
142
143 let s = f.to_string();
144
145 if !s.contains('.') {
147 return s;
148 }
149
150 let trimmed = s.trim_end_matches('0').trim_end_matches('.');
152 trimmed.to_string()
153 }
154}
155
156impl Default for ValueNormalizer {
157 fn default() -> Self {
158 Self::new(NormalizationConfig::default())
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 #![allow(clippy::approx_constant)]
165
166 use super::*;
167
168 #[test]
169 fn test_default_config() {
170 let config = NormalizationConfig::default();
171 assert_eq!(config.string_case, StringCaseRule::None);
172 assert_eq!(config.float_precision, None);
173 assert!(config.remove_trailing_zeros);
174 }
175
176 #[test]
177 fn test_normalize_int() {
178 let normalizer = ValueNormalizer::default();
179 let value = LnmpValue::Int(42);
180 let normalized = normalizer.normalize(&value);
181 assert_eq!(normalized, LnmpValue::Int(42));
182 }
183
184 #[test]
185 fn test_normalize_bool() {
186 let normalizer = ValueNormalizer::default();
187 let value = LnmpValue::Bool(true);
188 let normalized = normalizer.normalize(&value);
189 assert_eq!(normalized, LnmpValue::Bool(true));
190 }
191
192 #[test]
193 fn test_normalize_bool_from_string() {
194 let normalizer = ValueNormalizer::default();
195
196 assert_eq!(normalizer.normalize_bool("true"), Some(true));
197 assert_eq!(normalizer.normalize_bool("True"), Some(true));
198 assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
199 assert_eq!(normalizer.normalize_bool("yes"), Some(true));
200 assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
201 assert_eq!(normalizer.normalize_bool("1"), Some(true));
202
203 assert_eq!(normalizer.normalize_bool("false"), Some(false));
204 assert_eq!(normalizer.normalize_bool("False"), Some(false));
205 assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
206 assert_eq!(normalizer.normalize_bool("no"), Some(false));
207 assert_eq!(normalizer.normalize_bool("No"), Some(false));
208 assert_eq!(normalizer.normalize_bool("0"), Some(false));
209
210 assert_eq!(normalizer.normalize_bool("invalid"), None);
211 assert_eq!(normalizer.normalize_bool(""), None);
212 }
213
214 #[test]
215 fn test_normalize_float_negative_zero() {
216 let normalizer = ValueNormalizer::default();
217 let value = LnmpValue::Float(-0.0);
218 let normalized = normalizer.normalize(&value);
219 assert_eq!(normalized, LnmpValue::Float(0.0));
220 }
221
222 #[test]
223 fn test_normalize_float_positive_zero() {
224 let normalizer = ValueNormalizer::default();
225 let value = LnmpValue::Float(0.0);
226 let normalized = normalizer.normalize(&value);
227 assert_eq!(normalized, LnmpValue::Float(0.0));
228 }
229
230 #[test]
231 fn test_normalize_float_regular() {
232 let normalizer = ValueNormalizer::default();
233 let value = LnmpValue::Float(3.14);
234 let normalized = normalizer.normalize(&value);
235 assert_eq!(normalized, LnmpValue::Float(3.14));
236 }
237
238 #[test]
239 fn test_normalize_float_with_precision() {
240 let config = NormalizationConfig {
241 string_case: StringCaseRule::None,
242 float_precision: Some(2),
243 remove_trailing_zeros: true,
244 semantic_dictionary: None,
245 };
246 let normalizer = ValueNormalizer::new(config);
247
248 let value = LnmpValue::Float(3.14159);
249 let normalized = normalizer.normalize(&value);
250 assert_eq!(normalized, LnmpValue::Float(3.14));
251 }
252
253 #[test]
254 fn test_format_float_remove_trailing_zeros() {
255 let normalizer = ValueNormalizer::default();
256
257 assert_eq!(normalizer.format_float(3.140), "3.14");
258 assert_eq!(normalizer.format_float(3.100), "3.1");
259 assert_eq!(normalizer.format_float(3.000), "3");
260 assert_eq!(normalizer.format_float(3.14), "3.14");
261 assert_eq!(normalizer.format_float(0.0), "0");
262 }
263
264 #[test]
265 fn test_format_float_keep_trailing_zeros() {
266 let config = NormalizationConfig {
267 string_case: StringCaseRule::None,
268 float_precision: None,
269 remove_trailing_zeros: false,
270 semantic_dictionary: None,
271 };
272 let normalizer = ValueNormalizer::new(config);
273
274 let formatted = normalizer.format_float(3.14);
275 assert!(formatted.starts_with("3.14"));
276 }
277
278 #[test]
279 fn test_normalize_string_no_case() {
280 let normalizer = ValueNormalizer::default();
281 let value = LnmpValue::String("Test".to_string());
282 let normalized = normalizer.normalize(&value);
283 assert_eq!(normalized, LnmpValue::String("Test".to_string()));
284 }
285
286 #[test]
287 fn test_normalize_string_lowercase() {
288 let config = NormalizationConfig {
289 string_case: StringCaseRule::Lower,
290 float_precision: None,
291 remove_trailing_zeros: true,
292 semantic_dictionary: None,
293 };
294 let normalizer = ValueNormalizer::new(config);
295
296 let value = LnmpValue::String("TeSt".to_string());
297 let normalized = normalizer.normalize(&value);
298 assert_eq!(normalized, LnmpValue::String("test".to_string()));
299 }
300
301 #[test]
302 fn test_normalize_string_uppercase() {
303 let config = NormalizationConfig {
304 string_case: StringCaseRule::Upper,
305 float_precision: None,
306 remove_trailing_zeros: true,
307 semantic_dictionary: None,
308 };
309 let normalizer = ValueNormalizer::new(config);
310
311 let value = LnmpValue::String("TeSt".to_string());
312 let normalized = normalizer.normalize(&value);
313 assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
314 }
315
316 #[test]
317 fn test_normalize_string_array() {
318 let config = NormalizationConfig {
319 string_case: StringCaseRule::Lower,
320 float_precision: None,
321 remove_trailing_zeros: true,
322 semantic_dictionary: None,
323 };
324 let normalizer = ValueNormalizer::new(config);
325
326 let value = LnmpValue::StringArray(vec![
327 "Admin".to_string(),
328 "Developer".to_string(),
329 "USER".to_string(),
330 ]);
331 let normalized = normalizer.normalize(&value);
332
333 assert_eq!(
334 normalized,
335 LnmpValue::StringArray(vec![
336 "admin".to_string(),
337 "developer".to_string(),
338 "user".to_string(),
339 ])
340 );
341 }
342
343 #[test]
344 fn test_normalize_nested_record() {
345 use lnmp_core::{LnmpField, LnmpRecord};
346
347 let normalizer = ValueNormalizer::default();
348
349 let mut record = LnmpRecord::new();
350 record.add_field(LnmpField {
351 fid: 1,
352 value: LnmpValue::Int(42),
353 });
354
355 let value = LnmpValue::NestedRecord(Box::new(record.clone()));
356 let normalized = normalizer.normalize(&value);
357
358 assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
360 }
361
362 #[test]
363 fn test_normalize_nested_array() {
364 use lnmp_core::{LnmpField, LnmpRecord};
365
366 let normalizer = ValueNormalizer::default();
367
368 let mut record = LnmpRecord::new();
369 record.add_field(LnmpField {
370 fid: 1,
371 value: LnmpValue::Int(42),
372 });
373
374 let value = LnmpValue::NestedArray(vec![record.clone()]);
375 let normalized = normalizer.normalize(&value);
376
377 assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
379 }
380
381 #[test]
382 fn test_string_case_rule_default() {
383 assert_eq!(StringCaseRule::default(), StringCaseRule::None);
384 }
385}