1use lnmp_core::LnmpValue;
10use lnmp_sfe::SemanticDictionary;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum StringCaseRule {
15 Lower,
17 Upper,
19 #[default]
21 None,
22}
23
24#[derive(Debug, Clone)]
28pub struct NormalizationConfig {
29 pub string_case: StringCaseRule,
31 pub float_precision: Option<usize>,
33 pub remove_trailing_zeros: bool,
35 pub semantic_dictionary: Option<SemanticDictionary>,
37}
38
39impl Default for NormalizationConfig {
40 fn default() -> Self {
41 Self {
42 string_case: StringCaseRule::None,
43 float_precision: None,
44 remove_trailing_zeros: true,
45 semantic_dictionary: None,
46 }
47 }
48}
49
50#[derive(Debug)]
52pub struct ValueNormalizer {
53 config: NormalizationConfig,
54}
55
56impl ValueNormalizer {
57 pub fn new(config: NormalizationConfig) -> Self {
59 Self { config }
60 }
61
62 pub fn normalize(&self, value: &LnmpValue) -> LnmpValue {
64 self.normalize_with_fid(None, value)
65 }
66
67 pub fn normalize_with_fid(&self, fid: Option<u16>, value: &LnmpValue) -> LnmpValue {
69 match value {
70 LnmpValue::Int(i) => LnmpValue::Int(*i),
71 LnmpValue::Float(f) => LnmpValue::Float(self.normalize_float(*f)),
72 LnmpValue::Bool(b) => LnmpValue::Bool(*b),
73 LnmpValue::String(s) => LnmpValue::String(self.normalize_string_for(fid, s)),
74 LnmpValue::StringArray(arr) => LnmpValue::StringArray(
75 arr.iter()
76 .map(|s| self.normalize_string_for(fid, s))
77 .collect(),
78 ),
79 LnmpValue::NestedRecord(record) => LnmpValue::NestedRecord(record.clone()),
80 LnmpValue::NestedArray(records) => LnmpValue::NestedArray(records.clone()),
81 LnmpValue::Embedding(vec) => LnmpValue::Embedding(vec.clone()),
82 LnmpValue::EmbeddingDelta(delta) => LnmpValue::EmbeddingDelta(delta.clone()),
83 LnmpValue::QuantizedEmbedding(qv) => LnmpValue::QuantizedEmbedding(qv.clone()),
84 }
85 }
86
87 pub fn normalize_bool(&self, value: &str) -> Option<bool> {
93 match value.to_lowercase().as_str() {
94 "true" | "yes" | "1" => Some(true),
95 "false" | "no" | "0" => Some(false),
96 _ => None,
97 }
98 }
99
100 fn normalize_float(&self, f: f64) -> f64 {
106 let mut normalized = if f == 0.0 { 0.0 } else { f };
108
109 if let Some(precision) = self.config.float_precision {
111 let multiplier = 10_f64.powi(precision as i32);
112 normalized = (normalized * multiplier).round() / multiplier;
113 }
114
115 normalized
116 }
117
118 fn normalize_string_for(&self, fid: Option<u16>, s: &str) -> String {
122 if let (Some(dict), Some(fid)) = (&self.config.semantic_dictionary, fid) {
123 if let Some(eq) = dict.get_equivalence(fid, s) {
124 return eq.to_string();
125 }
126 if let Some(eq) = dict.get_equivalence_normalized(fid, s) {
127 return eq.to_string();
128 }
129 }
130
131 match self.config.string_case {
132 StringCaseRule::Lower => s.to_lowercase(),
133 StringCaseRule::Upper => s.to_uppercase(),
134 StringCaseRule::None => s.to_string(),
135 }
136 }
137
138 pub fn format_float(&self, f: f64) -> String {
140 if !self.config.remove_trailing_zeros {
141 return f.to_string();
142 }
143
144 let s = f.to_string();
145
146 if !s.contains('.') {
148 return s;
149 }
150
151 let trimmed = s.trim_end_matches('0').trim_end_matches('.');
153 trimmed.to_string()
154 }
155}
156
157impl Default for ValueNormalizer {
158 fn default() -> Self {
159 Self::new(NormalizationConfig::default())
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 #![allow(clippy::approx_constant)]
166
167 use super::*;
168
169 #[test]
170 fn test_default_config() {
171 let config = NormalizationConfig::default();
172 assert_eq!(config.string_case, StringCaseRule::None);
173 assert_eq!(config.float_precision, None);
174 assert!(config.remove_trailing_zeros);
175 }
176
177 #[test]
178 fn test_normalize_int() {
179 let normalizer = ValueNormalizer::default();
180 let value = LnmpValue::Int(42);
181 let normalized = normalizer.normalize(&value);
182 assert_eq!(normalized, LnmpValue::Int(42));
183 }
184
185 #[test]
186 fn test_normalize_bool() {
187 let normalizer = ValueNormalizer::default();
188 let value = LnmpValue::Bool(true);
189 let normalized = normalizer.normalize(&value);
190 assert_eq!(normalized, LnmpValue::Bool(true));
191 }
192
193 #[test]
194 fn test_normalize_bool_from_string() {
195 let normalizer = ValueNormalizer::default();
196
197 assert_eq!(normalizer.normalize_bool("true"), Some(true));
198 assert_eq!(normalizer.normalize_bool("True"), Some(true));
199 assert_eq!(normalizer.normalize_bool("TRUE"), Some(true));
200 assert_eq!(normalizer.normalize_bool("yes"), Some(true));
201 assert_eq!(normalizer.normalize_bool("Yes"), Some(true));
202 assert_eq!(normalizer.normalize_bool("1"), Some(true));
203
204 assert_eq!(normalizer.normalize_bool("false"), Some(false));
205 assert_eq!(normalizer.normalize_bool("False"), Some(false));
206 assert_eq!(normalizer.normalize_bool("FALSE"), Some(false));
207 assert_eq!(normalizer.normalize_bool("no"), Some(false));
208 assert_eq!(normalizer.normalize_bool("No"), Some(false));
209 assert_eq!(normalizer.normalize_bool("0"), Some(false));
210
211 assert_eq!(normalizer.normalize_bool("invalid"), None);
212 assert_eq!(normalizer.normalize_bool(""), None);
213 }
214
215 #[test]
216 fn test_normalize_float_negative_zero() {
217 let normalizer = ValueNormalizer::default();
218 let value = LnmpValue::Float(-0.0);
219 let normalized = normalizer.normalize(&value);
220 assert_eq!(normalized, LnmpValue::Float(0.0));
221 }
222
223 #[test]
224 fn test_normalize_float_positive_zero() {
225 let normalizer = ValueNormalizer::default();
226 let value = LnmpValue::Float(0.0);
227 let normalized = normalizer.normalize(&value);
228 assert_eq!(normalized, LnmpValue::Float(0.0));
229 }
230
231 #[test]
232 fn test_normalize_float_regular() {
233 let normalizer = ValueNormalizer::default();
234 let value = LnmpValue::Float(3.14);
235 let normalized = normalizer.normalize(&value);
236 assert_eq!(normalized, LnmpValue::Float(3.14));
237 }
238
239 #[test]
240 fn test_normalize_float_with_precision() {
241 let config = NormalizationConfig {
242 string_case: StringCaseRule::None,
243 float_precision: Some(2),
244 remove_trailing_zeros: true,
245 semantic_dictionary: None,
246 };
247 let normalizer = ValueNormalizer::new(config);
248
249 let value = LnmpValue::Float(3.14159);
250 let normalized = normalizer.normalize(&value);
251 assert_eq!(normalized, LnmpValue::Float(3.14));
252 }
253
254 #[test]
255 fn test_format_float_remove_trailing_zeros() {
256 let normalizer = ValueNormalizer::default();
257
258 assert_eq!(normalizer.format_float(3.140), "3.14");
259 assert_eq!(normalizer.format_float(3.100), "3.1");
260 assert_eq!(normalizer.format_float(3.000), "3");
261 assert_eq!(normalizer.format_float(3.14), "3.14");
262 assert_eq!(normalizer.format_float(0.0), "0");
263 }
264
265 #[test]
266 fn test_format_float_keep_trailing_zeros() {
267 let config = NormalizationConfig {
268 string_case: StringCaseRule::None,
269 float_precision: None,
270 remove_trailing_zeros: false,
271 semantic_dictionary: None,
272 };
273 let normalizer = ValueNormalizer::new(config);
274
275 let formatted = normalizer.format_float(3.14);
276 assert!(formatted.starts_with("3.14"));
277 }
278
279 #[test]
280 fn test_normalize_string_no_case() {
281 let normalizer = ValueNormalizer::default();
282 let value = LnmpValue::String("Test".to_string());
283 let normalized = normalizer.normalize(&value);
284 assert_eq!(normalized, LnmpValue::String("Test".to_string()));
285 }
286
287 #[test]
288 fn test_normalize_string_lowercase() {
289 let config = NormalizationConfig {
290 string_case: StringCaseRule::Lower,
291 float_precision: None,
292 remove_trailing_zeros: true,
293 semantic_dictionary: None,
294 };
295 let normalizer = ValueNormalizer::new(config);
296
297 let value = LnmpValue::String("TeSt".to_string());
298 let normalized = normalizer.normalize(&value);
299 assert_eq!(normalized, LnmpValue::String("test".to_string()));
300 }
301
302 #[test]
303 fn test_normalize_string_uppercase() {
304 let config = NormalizationConfig {
305 string_case: StringCaseRule::Upper,
306 float_precision: None,
307 remove_trailing_zeros: true,
308 semantic_dictionary: None,
309 };
310 let normalizer = ValueNormalizer::new(config);
311
312 let value = LnmpValue::String("TeSt".to_string());
313 let normalized = normalizer.normalize(&value);
314 assert_eq!(normalized, LnmpValue::String("TEST".to_string()));
315 }
316
317 #[test]
318 fn test_normalize_string_array() {
319 let config = NormalizationConfig {
320 string_case: StringCaseRule::Lower,
321 float_precision: None,
322 remove_trailing_zeros: true,
323 semantic_dictionary: None,
324 };
325 let normalizer = ValueNormalizer::new(config);
326
327 let value = LnmpValue::StringArray(vec![
328 "Admin".to_string(),
329 "Developer".to_string(),
330 "USER".to_string(),
331 ]);
332 let normalized = normalizer.normalize(&value);
333
334 assert_eq!(
335 normalized,
336 LnmpValue::StringArray(vec![
337 "admin".to_string(),
338 "developer".to_string(),
339 "user".to_string(),
340 ])
341 );
342 }
343
344 #[test]
345 fn test_normalize_nested_record() {
346 use lnmp_core::{LnmpField, LnmpRecord};
347
348 let normalizer = ValueNormalizer::default();
349
350 let mut record = LnmpRecord::new();
351 record.add_field(LnmpField {
352 fid: 1,
353 value: LnmpValue::Int(42),
354 });
355
356 let value = LnmpValue::NestedRecord(Box::new(record.clone()));
357 let normalized = normalizer.normalize(&value);
358
359 assert_eq!(normalized, LnmpValue::NestedRecord(Box::new(record)));
361 }
362
363 #[test]
364 fn test_normalize_nested_array() {
365 use lnmp_core::{LnmpField, LnmpRecord};
366
367 let normalizer = ValueNormalizer::default();
368
369 let mut record = LnmpRecord::new();
370 record.add_field(LnmpField {
371 fid: 1,
372 value: LnmpValue::Int(42),
373 });
374
375 let value = LnmpValue::NestedArray(vec![record.clone()]);
376 let normalized = normalizer.normalize(&value);
377
378 assert_eq!(normalized, LnmpValue::NestedArray(vec![record]));
380 }
381
382 #[test]
383 fn test_string_case_rule_default() {
384 assert_eq!(StringCaseRule::default(), StringCaseRule::None);
385 }
386}