1use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15 Lower,
17 Upper,
19 #[default]
21 None,
22}
23
24#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29 pub string_case: StringCaseRule,
31 pub float_precision: Option<usize>,
33 pub remove_trailing_zeros: bool,
35 pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40 fn default() -> Self {
41 Self {
42 string_case: StringCaseRule::None,
43 float_precision: None,
44 remove_trailing_zeros: true,
45 semantic_dictionary: None,
46 }
47 }
48}
49
50#[derive(Debug)]
52pub struct ValueNormalizer {
53 config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57 pub fn new(config: NormalizationConfig) -> Self {
59 Self { config }
60 }
61
62 pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64 self.normalize_with_fid(None, value)
65 }
66
67 pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69 match value {
70 LnmpValue::Int(i) => LnmpValue::Int(*i),
71 LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72 LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73 LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74 LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75 arr.iter()
76 .map(|s| self.normalize_string_for(fid, s))
77 .collect(),
78 ),
79 LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80 LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81 LnmpValue::Embedding(vec) => LnmpValue::Embedding(vec.clone()),
82 LnmpValue::EmbeddingDelta(delta) => LnmpValue::EmbeddingDelta(delta.clone()),
83 LnmpValue::QuantizedEmbedding(qv) => LnmpValue::QuantizedEmbedding(qv.clone()),
84 LnmpValue::IntArray(arr) => LnmpValue::IntArray(arr.clone()),
85 LnmpValue::FloatArray(arr) => {
86 let normalized_arr = arr.iter().map(|f| self.normalize_float(*f)).collect();
88 LnmpValue::FloatArray(normalized_arr)
89 }
90 LnmpValue::BoolArray(arr) => LnmpValue::BoolArray(arr.clone()),
91 }
92 }
93
94 pub fn normalize_bool(&self, value: &str) -> Option<bool> {
100 match value.to_lowercase().as_str() {
101 "true" | "yes" | "1" => Some(true),
102 "false" | "no" | "0" => Some(false),
103 _ => None,
104 }
105 }
106
107 fn normalize_float(&self, f: f64) -> f64 {
113 let mut normalized = if f == 0.0 { 0.0 } else { f };
115
116 if let Some(precision) = self.config.float_precision {
118 let multiplier = 10_f64.powi(precision as i32);
119 normalized = (normalized * multiplier).round() / multiplier;
120 }
121
122 normalized
123 }
124
125 fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
129 if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
130 if let Some(eq) = dict.get_equivalence(fid, s) {
131 return eq.to_string();
132 }
133 if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
134 return eq.to_string();
135 }
136 }
137
138 match self.config.string_case {
139 StringCaseRule::Lower => s.to_lowercase(),
140 StringCaseRule::Upper => s.to_uppercase(),
141 StringCaseRule::None => s.to_string(),
142 }
143 }
144
145 pub fn format_float(&self, f: f64) -> String {
147 if !self.config.remove_trailing_zeros {
148 return f.to_string();
149 }
150
151 let s = f.to_string();
152
153 if !s.contains('.') {
155 return s;
156 }
157
158 let trimmed = s.trim_end_matches('0').trim_end_matches('.');
160 trimmed.to_string()
161 }
162}
163
164impl Default for ValueNormalizer {
165 fn default() -> Self {
166 Self::new(NormalizationConfig::default())
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 #![allow(clippy::approx_constant)]
173
174 use super::*;
175
176 #[test]
177 fn test_default_config() {
178 let config = NormalizationConfig::default();
179 assert_eq!(config.string_case, StringCaseRule::None);
180 assert_eq!(config.float_precision, None);
181 assert!(config.remove_trailing_zeros);
182 }
183
184 #[test]
185 fn test_normalize_int() {
186 let normalizer = ValueNormalizer::default();
187 let value = LnmpValue::Int(42);
188 let normalized = normalizer.normalize(&value);
189 assert_eq!(normalized, LnmpValue::Int(42));
190 }
191
192 #[test]
193 fn test_normalize_bool() {
194 let normalizer = ValueNormalizer::default();
195 let value = LnmpValue::Bool(true);
196 let normalized = normalizer.normalize(&value);
197 assert_eq!(normalized, LnmpValue::Bool(true));
198 }
199
200 #[test]
201 fn test_normalize_bool_from_string() {
202 let normalizer = ValueNormalizer::default();
203
204 assert_eq!(normalizer.normalize_bool("true"), Some(true));
205 assert_eq!(normalizer.normalize_bool("True"), Some(true));
206 assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
207 assert_eq!(normalizer.normalize_bool("yes"), Some(true));
208 assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
209 assert_eq!(normalizer.normalize_bool("1"), Some(true));
210
211 assert_eq!(normalizer.normalize_bool("false"), Some(false));
212 assert_eq!(normalizer.normalize_bool("False"), Some(false));
213 assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
214 assert_eq!(normalizer.normalize_bool("no"), Some(false));
215 assert_eq!(normalizer.normalize_bool("No"), Some(false));
216 assert_eq!(normalizer.normalize_bool("0"), Some(false));
217
218 assert_eq!(normalizer.normalize_bool("invalid"), None);
219 assert_eq!(normalizer.normalize_bool(""), None);
220 }
221
222 #[test]
223 fn test_normalize_float_negative_zero() {
224 let normalizer = ValueNormalizer::default();
225 let value = LnmpValue::Float(-0.0);
226 let normalized = normalizer.normalize(&value);
227 assert_eq!(normalized, LnmpValue::Float(0.0));
228 }
229
230 #[test]
231 fn test_normalize_float_positive_zero() {
232 let normalizer = ValueNormalizer::default();
233 let value = LnmpValue::Float(0.0);
234 let normalized = normalizer.normalize(&value);
235 assert_eq!(normalized, LnmpValue::Float(0.0));
236 }
237
238 #[test]
239 fn test_normalize_float_regular() {
240 let normalizer = ValueNormalizer::default();
241 let value = LnmpValue::Float(3.14);
242 let normalized = normalizer.normalize(&value);
243 assert_eq!(normalized, LnmpValue::Float(3.14));
244 }
245
246 #[test]
247 fn test_normalize_float_with_precision() {
248 let config = NormalizationConfig {
249 string_case: StringCaseRule::None,
250 float_precision: Some(2),
251 remove_trailing_zeros: true,
252 semantic_dictionary: None,
253 };
254 let normalizer = ValueNormalizer::new(config);
255
256 let value = LnmpValue::Float(3.14159);
257 let normalized = normalizer.normalize(&value);
258 assert_eq!(normalized, LnmpValue::Float(3.14));
259 }
260
261 #[test]
262 fn test_format_float_remove_trailing_zeros() {
263 let normalizer = ValueNormalizer::default();
264
265 assert_eq!(normalizer.format_float(3.140), "3.14");
266 assert_eq!(normalizer.format_float(3.100), "3.1");
267 assert_eq!(normalizer.format_float(3.000), "3");
268 assert_eq!(normalizer.format_float(3.14), "3.14");
269 assert_eq!(normalizer.format_float(0.0), "0");
270 }
271
272 #[test]
273 fn test_format_float_keep_trailing_zeros() {
274 let config = NormalizationConfig {
275 string_case: StringCaseRule::None,
276 float_precision: None,
277 remove_trailing_zeros: false,
278 semantic_dictionary: None,
279 };
280 let normalizer = ValueNormalizer::new(config);
281
282 let formatted = normalizer.format_float(3.14);
283 assert!(formatted.starts_with("3.14"));
284 }
285
286 #[test]
287 fn test_normalize_string_no_case() {
288 let normalizer = ValueNormalizer::default();
289 let value = LnmpValue::String("Test".to_string());
290 let normalized = normalizer.normalize(&value);
291 assert_eq!(normalized, LnmpValue::String("Test".to_string()));
292 }
293
294 #[test]
295 fn test_normalize_string_lowercase() {
296 let config = NormalizationConfig {
297 string_case: StringCaseRule::Lower,
298 float_precision: None,
299 remove_trailing_zeros: true,
300 semantic_dictionary: None,
301 };
302 let normalizer = ValueNormalizer::new(config);
303
304 let value = LnmpValue::String("TeSt".to_string());
305 let normalized = normalizer.normalize(&value);
306 assert_eq!(normalized, LnmpValue::String("test".to_string()));
307 }
308
309 #[test]
310 fn test_normalize_string_uppercase() {
311 let config = NormalizationConfig {
312 string_case: StringCaseRule::Upper,
313 float_precision: None,
314 remove_trailing_zeros: true,
315 semantic_dictionary: None,
316 };
317 let normalizer = ValueNormalizer::new(config);
318
319 let value = LnmpValue::String("TeSt".to_string());
320 let normalized = normalizer.normalize(&value);
321 assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
322 }
323
324 #[test]
325 fn test_normalize_string_array() {
326 let config = NormalizationConfig {
327 string_case: StringCaseRule::Lower,
328 float_precision: None,
329 remove_trailing_zeros: true,
330 semantic_dictionary: None,
331 };
332 let normalizer = ValueNormalizer::new(config);
333
334 let value = LnmpValue::StringArray(vec![
335 "Admin".to_string(),
336 "Developer".to_string(),
337 "USER".to_string(),
338 ]);
339 let normalized = normalizer.normalize(&value);
340
341 assert_eq!(
342 normalized,
343 LnmpValue::StringArray(vec![
344 "admin".to_string(),
345 "developer".to_string(),
346 "user".to_string(),
347 ])
348 );
349 }
350
351 #[test]
352 fn test_normalize_nested_record() {
353 use lnmp_core::{LnmpField, LnmpRecord};
354
355 let normalizer = ValueNormalizer::default();
356
357 let mut record = LnmpRecord::new();
358 record.add_field(LnmpField {
359 fid: 1,
360 value: LnmpValue::Int(42),
361 });
362
363 let value = LnmpValue::NestedRecord(Box::new(record.clone()));
364 let normalized = normalizer.normalize(&value);
365
366 assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
368 }
369
370 #[test]
371 fn test_normalize_nested_array() {
372 use lnmp_core::{LnmpField, LnmpRecord};
373
374 let normalizer = ValueNormalizer::default();
375
376 let mut record = LnmpRecord::new();
377 record.add_field(LnmpField {
378 fid: 1,
379 value: LnmpValue::Int(42),
380 });
381
382 let value = LnmpValue::NestedArray(vec![record.clone()]);
383 let normalized = normalizer.normalize(&value);
384
385 assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
387 }
388
389 #[test]
390 fn test_string_case_rule_default() {
391 assert_eq!(StringCaseRule::default(), StringCaseRule::None);
392 }
393}