1use std::collections::{HashMap, HashSet};
22
23use redis::AsyncCommands;
24use redis::aio::ConnectionManager;
25use tokio::runtime::Runtime;
26
27use crate::connection::RedisConnection;
28use crate::error::{Error, Result};
29use crate::schema::RedisType;
30
31#[derive(Debug, Clone)]
33pub struct InferredSchema {
34 pub fields: Vec<(String, RedisType)>,
36 pub sample_count: usize,
38}
39
40#[derive(Debug, Clone)]
42pub struct FieldInferenceInfo {
43 pub inferred_type: RedisType,
45 pub confidence: f64,
48 pub samples: usize,
50 pub valid: usize,
52 pub nulls: usize,
54 pub type_candidates: HashMap<String, usize>,
56}
57
58impl FieldInferenceInfo {
59 pub fn is_confident(&self, threshold: f64) -> bool {
61 self.confidence >= threshold
62 }
63
64 pub fn null_ratio(&self) -> f64 {
66 if self.samples == 0 {
67 0.0
68 } else {
69 self.nulls as f64 / self.samples as f64
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
76pub struct InferredSchemaWithConfidence {
77 pub fields: Vec<(String, RedisType)>,
79 pub sample_count: usize,
81 pub field_info: HashMap<String, FieldInferenceInfo>,
83}
84
85impl InferredSchemaWithConfidence {
86 pub fn to_basic(&self) -> InferredSchema {
88 InferredSchema {
89 fields: self.fields.clone(),
90 sample_count: self.sample_count,
91 }
92 }
93
94 pub fn low_confidence_fields(&self, threshold: f64) -> Vec<(&str, f64)> {
96 self.field_info
97 .iter()
98 .filter(|(_, info)| info.confidence < threshold)
99 .map(|(name, info)| (name.as_str(), info.confidence))
100 .collect()
101 }
102
103 pub fn all_confident(&self, threshold: f64) -> bool {
105 self.field_info
106 .values()
107 .all(|info| info.confidence >= threshold)
108 }
109
110 pub fn average_confidence(&self) -> f64 {
112 if self.field_info.is_empty() {
113 1.0
114 } else {
115 let sum: f64 = self.field_info.values().map(|info| info.confidence).sum();
116 sum / self.field_info.len() as f64
117 }
118 }
119}
120
121impl InferredSchema {
122 pub fn to_type_strings(&self) -> Vec<(String, String)> {
124 self.fields
125 .iter()
126 .map(|(name, dtype)| {
127 let type_str = match dtype {
128 RedisType::Utf8 => "utf8",
129 RedisType::Int64 => "int64",
130 RedisType::Float64 => "float64",
131 RedisType::Boolean => "bool",
132 RedisType::Date => "date",
133 RedisType::Datetime => "datetime",
134 };
135 (name.clone(), type_str.to_string())
136 })
137 .collect()
138 }
139
140 pub fn with_overwrite(self, overwrite: &[(String, RedisType)]) -> Self {
174 let overwrite_map: HashMap<&str, &RedisType> =
175 overwrite.iter().map(|(k, v)| (k.as_str(), v)).collect();
176
177 let existing_fields: HashSet<String> = self.fields.iter().map(|(k, _)| k.clone()).collect();
179
180 let mut fields: Vec<(String, RedisType)> = self
182 .fields
183 .into_iter()
184 .map(|(name, dtype)| {
185 if let Some(&override_type) = overwrite_map.get(name.as_str()) {
186 (name, *override_type)
187 } else {
188 (name, dtype)
189 }
190 })
191 .collect();
192
193 for (name, dtype) in overwrite {
195 if !existing_fields.contains(name) {
196 fields.push((name.clone(), *dtype));
197 }
198 }
199
200 fields.sort_by(|a, b| a.0.cmp(&b.0));
202
203 Self {
204 fields,
205 sample_count: self.sample_count,
206 }
207 }
208}
209
210pub fn infer_hash_schema(
221 url: &str,
222 pattern: &str,
223 sample_size: usize,
224 type_inference: bool,
225) -> Result<InferredSchema> {
226 let runtime =
227 Runtime::new().map_err(|e| Error::Runtime(format!("Failed to create runtime: {}", e)))?;
228
229 let connection = RedisConnection::new(url)?;
230 let mut conn = runtime.block_on(connection.get_connection_manager())?;
231
232 runtime.block_on(infer_hash_schema_async(
233 &mut conn,
234 pattern,
235 sample_size,
236 type_inference,
237 ))
238}
239
240pub fn infer_hash_schema_with_confidence(
278 url: &str,
279 pattern: &str,
280 sample_size: usize,
281) -> Result<InferredSchemaWithConfidence> {
282 let runtime =
283 Runtime::new().map_err(|e| Error::Runtime(format!("Failed to create runtime: {}", e)))?;
284
285 let connection = RedisConnection::new(url)?;
286 let mut conn = runtime.block_on(connection.get_connection_manager())?;
287
288 runtime.block_on(infer_hash_schema_with_confidence_async(
289 &mut conn,
290 pattern,
291 sample_size,
292 ))
293}
294
295async fn infer_hash_schema_with_confidence_async(
297 conn: &mut ConnectionManager,
298 pattern: &str,
299 sample_size: usize,
300) -> Result<InferredSchemaWithConfidence> {
301 let keys = scan_sample_keys(conn, pattern, sample_size).await?;
303
304 if keys.is_empty() {
305 return Ok(InferredSchemaWithConfidence {
306 fields: vec![],
307 sample_count: 0,
308 field_info: HashMap::new(),
309 });
310 }
311
312 let mut field_values: HashMap<String, Vec<Option<String>>> = HashMap::new();
314
315 for key in &keys {
316 let hash_data: HashMap<String, String> = conn.hgetall(key).await?;
317
318 let fields_in_hash: HashSet<&String> = hash_data.keys().collect();
320
321 for (field, value) in &hash_data {
323 field_values
324 .entry(field.clone())
325 .or_default()
326 .push(Some(value.clone()));
327 }
328
329 for (field, values) in &mut field_values {
331 if !fields_in_hash.contains(field) {
332 values.push(None);
333 }
334 }
335 }
336
337 let mut fields: Vec<(String, RedisType)> = Vec::new();
339 let mut field_info: HashMap<String, FieldInferenceInfo> = HashMap::new();
340
341 for (name, values) in field_values {
342 let (dtype, info) = infer_type_from_values_with_confidence(&values);
343 fields.push((name.clone(), dtype));
344 field_info.insert(name, info);
345 }
346
347 fields.sort_by(|a, b| a.0.cmp(&b.0));
349
350 Ok(InferredSchemaWithConfidence {
351 fields,
352 sample_count: keys.len(),
353 field_info,
354 })
355}
356
357async fn infer_hash_schema_async(
359 conn: &mut ConnectionManager,
360 pattern: &str,
361 sample_size: usize,
362 type_inference: bool,
363) -> Result<InferredSchema> {
364 let keys = scan_sample_keys(conn, pattern, sample_size).await?;
366
367 if keys.is_empty() {
368 return Ok(InferredSchema {
369 fields: vec![],
370 sample_count: 0,
371 });
372 }
373
374 let mut field_values: HashMap<String, Vec<Option<String>>> = HashMap::new();
376
377 for key in &keys {
378 let hash_data: HashMap<String, String> = conn.hgetall(key).await?;
379
380 let fields_in_hash: HashSet<&String> = hash_data.keys().collect();
382
383 for (field, value) in &hash_data {
385 field_values
386 .entry(field.clone())
387 .or_default()
388 .push(Some(value.clone()));
389 }
390
391 for (field, values) in &mut field_values {
393 if !fields_in_hash.contains(field) {
394 values.push(None);
395 }
396 }
397 }
398
399 let mut fields: Vec<(String, RedisType)> = field_values
401 .into_iter()
402 .map(|(name, values)| {
403 let dtype = if type_inference {
404 infer_type_from_values(&values)
405 } else {
406 RedisType::Utf8
407 };
408 (name, dtype)
409 })
410 .collect();
411
412 fields.sort_by(|a, b| a.0.cmp(&b.0));
414
415 Ok(InferredSchema {
416 fields,
417 sample_count: keys.len(),
418 })
419}
420
421pub fn infer_json_schema(url: &str, pattern: &str, sample_size: usize) -> Result<InferredSchema> {
431 let runtime =
432 Runtime::new().map_err(|e| Error::Runtime(format!("Failed to create runtime: {}", e)))?;
433
434 let connection = RedisConnection::new(url)?;
435 let mut conn = runtime.block_on(connection.get_connection_manager())?;
436
437 runtime.block_on(infer_json_schema_async(&mut conn, pattern, sample_size))
438}
439
440async fn infer_json_schema_async(
442 conn: &mut ConnectionManager,
443 pattern: &str,
444 sample_size: usize,
445) -> Result<InferredSchema> {
446 let keys = scan_sample_keys(conn, pattern, sample_size).await?;
448
449 if keys.is_empty() {
450 return Ok(InferredSchema {
451 fields: vec![],
452 sample_count: 0,
453 });
454 }
455
456 let mut field_values: HashMap<String, Vec<Option<serde_json::Value>>> = HashMap::new();
458
459 for key in &keys {
460 let json_str: Option<String> = redis::cmd("JSON.GET")
462 .arg(key)
463 .arg("$")
464 .query_async(conn)
465 .await?;
466
467 if let Some(json_str) = json_str {
468 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
470 let doc = match parsed {
471 serde_json::Value::Array(mut arr) if !arr.is_empty() => arr.remove(0),
472 other => other,
473 };
474
475 if let serde_json::Value::Object(obj) = doc {
476 let fields_in_doc: HashSet<&String> = obj.keys().collect();
477
478 for (field, value) in &obj {
480 field_values
481 .entry(field.clone())
482 .or_default()
483 .push(Some(value.clone()));
484 }
485
486 for (field, values) in &mut field_values {
488 if !fields_in_doc.contains(field) {
489 values.push(None);
490 }
491 }
492 }
493 }
494 }
495 }
496
497 let mut fields: Vec<(String, RedisType)> = field_values
499 .into_iter()
500 .map(|(name, values)| {
501 let dtype = infer_type_from_json_values(&values);
502 (name, dtype)
503 })
504 .collect();
505
506 fields.sort_by(|a, b| a.0.cmp(&b.0));
508
509 Ok(InferredSchema {
510 fields,
511 sample_count: keys.len(),
512 })
513}
514
515async fn scan_sample_keys(
517 conn: &mut ConnectionManager,
518 pattern: &str,
519 max_keys: usize,
520) -> Result<Vec<String>> {
521 let mut keys = Vec::new();
522 let mut cursor: u64 = 0;
523
524 loop {
525 let (new_cursor, batch): (u64, Vec<String>) = redis::cmd("SCAN")
526 .arg(cursor)
527 .arg("MATCH")
528 .arg(pattern)
529 .arg("COUNT")
530 .arg(100)
531 .query_async(conn)
532 .await?;
533
534 keys.extend(batch);
535 cursor = new_cursor;
536
537 if cursor == 0 || keys.len() >= max_keys {
538 break;
539 }
540 }
541
542 keys.truncate(max_keys);
544 Ok(keys)
545}
546
547fn infer_type_from_values(values: &[Option<String>]) -> RedisType {
549 infer_type_from_values_with_confidence(values).0
550}
551
552fn infer_type_from_values_with_confidence(
556 values: &[Option<String>],
557) -> (RedisType, FieldInferenceInfo) {
558 let total_samples = values.len();
559 let null_count = values.iter().filter(|v| v.is_none()).count();
560 let non_null_values: Vec<&str> = values.iter().filter_map(|v| v.as_deref()).collect();
561
562 if non_null_values.is_empty() {
563 return (
564 RedisType::Utf8,
565 FieldInferenceInfo {
566 inferred_type: RedisType::Utf8,
567 confidence: 1.0, samples: total_samples,
569 valid: 0,
570 nulls: null_count,
571 type_candidates: HashMap::new(),
572 },
573 );
574 }
575
576 let mut type_candidates: HashMap<String, usize> = HashMap::new();
578
579 let int_count = non_null_values
580 .iter()
581 .filter(|v| v.parse::<i64>().is_ok())
582 .count();
583 let float_count = non_null_values
584 .iter()
585 .filter(|v| v.parse::<f64>().is_ok())
586 .count();
587 let bool_count = non_null_values
588 .iter()
589 .filter(|v| is_boolean_string(v.to_lowercase().as_str()))
590 .count();
591
592 type_candidates.insert("int64".to_string(), int_count);
593 type_candidates.insert("float64".to_string(), float_count);
594 type_candidates.insert("bool".to_string(), bool_count);
595 type_candidates.insert("utf8".to_string(), non_null_values.len()); let non_null_count = non_null_values.len();
598
599 let (inferred_type, valid_count) = if int_count == non_null_count {
601 (RedisType::Int64, int_count)
602 } else if float_count == non_null_count {
603 (RedisType::Float64, float_count)
604 } else if bool_count == non_null_count {
605 (RedisType::Boolean, bool_count)
606 } else {
607 let best_specific = [
609 (RedisType::Int64, int_count),
610 (RedisType::Float64, float_count),
611 (RedisType::Boolean, bool_count),
612 ]
613 .into_iter()
614 .max_by_key(|(_, count)| *count);
615
616 if let Some((best_type, best_count)) = best_specific {
617 if best_count > 0 && best_count as f64 / non_null_count as f64 >= 0.5 {
618 (best_type, best_count)
620 } else {
621 (RedisType::Utf8, non_null_count)
622 }
623 } else {
624 (RedisType::Utf8, non_null_count)
625 }
626 };
627
628 let confidence = if non_null_count == 0 {
630 1.0
631 } else {
632 valid_count as f64 / non_null_count as f64
633 };
634
635 (
636 inferred_type,
637 FieldInferenceInfo {
638 inferred_type,
639 confidence,
640 samples: total_samples,
641 valid: valid_count,
642 nulls: null_count,
643 type_candidates,
644 },
645 )
646}
647
648fn infer_type_from_json_values(values: &[Option<serde_json::Value>]) -> RedisType {
650 let non_null_values: Vec<&serde_json::Value> =
651 values.iter().filter_map(|v| v.as_ref()).collect();
652
653 if non_null_values.is_empty() {
654 return RedisType::Utf8;
655 }
656
657 let first_type = json_value_type(non_null_values[0]);
659
660 if non_null_values
661 .iter()
662 .all(|v| json_value_type(v) == first_type)
663 {
664 match first_type {
665 "boolean" => RedisType::Boolean,
666 "integer" => RedisType::Int64,
667 "number" => RedisType::Float64,
668 _ => RedisType::Utf8,
669 }
670 } else {
671 if non_null_values
673 .iter()
674 .all(|v| matches!(json_value_type(v), "integer" | "number"))
675 {
676 RedisType::Float64
677 } else {
678 RedisType::Utf8
679 }
680 }
681}
682
683fn json_value_type(value: &serde_json::Value) -> &'static str {
685 match value {
686 serde_json::Value::Null => "null",
687 serde_json::Value::Bool(_) => "boolean",
688 serde_json::Value::Number(n) => {
689 if n.is_i64() || n.is_u64() {
690 "integer"
691 } else {
692 "number"
693 }
694 }
695 serde_json::Value::String(_) => "string",
696 serde_json::Value::Array(_) => "array",
697 serde_json::Value::Object(_) => "object",
698 }
699}
700
701fn is_boolean_string(s: &str) -> bool {
703 matches!(
704 s,
705 "true" | "false" | "1" | "0" | "yes" | "no" | "t" | "f" | "y" | "n"
706 )
707}
708
709#[cfg(test)]
710mod tests {
711 use super::*;
712
713 #[test]
714 fn test_infer_type_int() {
715 let values = vec![
716 Some("1".to_string()),
717 Some("42".to_string()),
718 Some("-10".to_string()),
719 ];
720 assert!(matches!(infer_type_from_values(&values), RedisType::Int64));
721 }
722
723 #[test]
724 fn test_infer_type_float() {
725 let values = vec![
726 Some("1.5".to_string()),
727 Some("42.0".to_string()),
728 Some("-10.25".to_string()),
729 ];
730 assert!(matches!(
731 infer_type_from_values(&values),
732 RedisType::Float64
733 ));
734 }
735
736 #[test]
737 fn test_infer_type_mixed_numeric() {
738 let values = vec![
740 Some("1".to_string()),
741 Some("42.5".to_string()),
742 Some("-10".to_string()),
743 ];
744 assert!(matches!(
745 infer_type_from_values(&values),
746 RedisType::Float64
747 ));
748 }
749
750 #[test]
751 fn test_infer_type_bool() {
752 let values = vec![
753 Some("true".to_string()),
754 Some("false".to_string()),
755 Some("True".to_string()),
756 ];
757 assert!(matches!(
758 infer_type_from_values(&values),
759 RedisType::Boolean
760 ));
761 }
762
763 #[test]
764 fn test_infer_type_string() {
765 let values = vec![
766 Some("hello".to_string()),
767 Some("world".to_string()),
768 Some("123abc".to_string()),
769 ];
770 assert!(matches!(infer_type_from_values(&values), RedisType::Utf8));
771 }
772
773 #[test]
774 fn test_infer_type_with_nulls() {
775 let values = vec![Some("42".to_string()), None, Some("100".to_string())];
776 assert!(matches!(infer_type_from_values(&values), RedisType::Int64));
777 }
778
779 #[test]
780 fn test_infer_type_all_nulls() {
781 let values: Vec<Option<String>> = vec![None, None, None];
782 assert!(matches!(infer_type_from_values(&values), RedisType::Utf8));
783 }
784
785 #[test]
786 fn test_infer_json_type_bool() {
787 let values = vec![
788 Some(serde_json::Value::Bool(true)),
789 Some(serde_json::Value::Bool(false)),
790 ];
791 assert!(matches!(
792 infer_type_from_json_values(&values),
793 RedisType::Boolean
794 ));
795 }
796
797 #[test]
798 fn test_infer_json_type_int() {
799 let values = vec![
800 Some(serde_json::json!(42)),
801 Some(serde_json::json!(-10)),
802 Some(serde_json::json!(0)),
803 ];
804 assert!(matches!(
805 infer_type_from_json_values(&values),
806 RedisType::Int64
807 ));
808 }
809
810 #[test]
811 fn test_infer_json_type_float() {
812 let values = vec![
813 Some(serde_json::json!(42.5)),
814 Some(serde_json::json!(-10.25)),
815 ];
816 assert!(matches!(
817 infer_type_from_json_values(&values),
818 RedisType::Float64
819 ));
820 }
821
822 #[test]
823 fn test_infer_json_type_string() {
824 let values = vec![
825 Some(serde_json::json!("hello")),
826 Some(serde_json::json!("world")),
827 ];
828 assert!(matches!(
829 infer_type_from_json_values(&values),
830 RedisType::Utf8
831 ));
832 }
833
834 #[test]
835 fn test_schema_overwrite_basic() {
836 let inferred = InferredSchema {
837 fields: vec![
838 ("age".to_string(), RedisType::Utf8),
839 ("name".to_string(), RedisType::Utf8),
840 ("score".to_string(), RedisType::Float64),
841 ],
842 sample_count: 10,
843 };
844
845 let overwrite = vec![("age".to_string(), RedisType::Int64)];
847 let merged = inferred.with_overwrite(&overwrite);
848
849 assert_eq!(merged.fields.len(), 3);
850 assert_eq!(merged.sample_count, 10);
851
852 let age_field = merged.fields.iter().find(|(n, _)| n == "age").unwrap();
854 assert!(matches!(age_field.1, RedisType::Int64));
855
856 let name_field = merged.fields.iter().find(|(n, _)| n == "name").unwrap();
858 assert!(matches!(name_field.1, RedisType::Utf8));
859 }
860
861 #[test]
862 fn test_schema_overwrite_adds_new_fields() {
863 let inferred = InferredSchema {
864 fields: vec![("name".to_string(), RedisType::Utf8)],
865 sample_count: 5,
866 };
867
868 let overwrite = vec![("extra_field".to_string(), RedisType::Int64)];
870 let merged = inferred.with_overwrite(&overwrite);
871
872 assert_eq!(merged.fields.len(), 2);
873
874 let extra = merged
876 .fields
877 .iter()
878 .find(|(n, _)| n == "extra_field")
879 .unwrap();
880 assert!(matches!(extra.1, RedisType::Int64));
881 }
882
883 #[test]
884 fn test_schema_overwrite_empty() {
885 let inferred = InferredSchema {
886 fields: vec![
887 ("a".to_string(), RedisType::Utf8),
888 ("b".to_string(), RedisType::Int64),
889 ],
890 sample_count: 10,
891 };
892
893 let overwrite: Vec<(String, RedisType)> = vec![];
894 let merged = inferred.with_overwrite(&overwrite);
895
896 assert_eq!(merged.fields.len(), 2);
897 }
898
899 #[test]
900 fn test_schema_overwrite_multiple() {
901 let inferred = InferredSchema {
902 fields: vec![
903 ("a".to_string(), RedisType::Utf8),
904 ("b".to_string(), RedisType::Utf8),
905 ("c".to_string(), RedisType::Utf8),
906 ],
907 sample_count: 10,
908 };
909
910 let overwrite = vec![
911 ("a".to_string(), RedisType::Int64),
912 ("c".to_string(), RedisType::Boolean),
913 ("d".to_string(), RedisType::Float64),
914 ];
915 let merged = inferred.with_overwrite(&overwrite);
916
917 assert_eq!(merged.fields.len(), 4);
918
919 let a = merged.fields.iter().find(|(n, _)| n == "a").unwrap();
920 assert!(matches!(a.1, RedisType::Int64));
921
922 let b = merged.fields.iter().find(|(n, _)| n == "b").unwrap();
923 assert!(matches!(b.1, RedisType::Utf8));
924
925 let c = merged.fields.iter().find(|(n, _)| n == "c").unwrap();
926 assert!(matches!(c.1, RedisType::Boolean));
927
928 let d = merged.fields.iter().find(|(n, _)| n == "d").unwrap();
929 assert!(matches!(d.1, RedisType::Float64));
930 }
931
932 fn infer_single(s: &str) -> RedisType {
938 infer_type_from_values(&[Some(s.to_string())])
939 }
940
941 fn infer_single_json(v: &serde_json::Value) -> RedisType {
943 infer_type_from_json_values(&[Some(v.clone())])
944 }
945
946 mod proptest_tests {
947 use super::*;
948 use proptest::prelude::*;
949
950 proptest! {
951 #[test]
953 fn prop_infer_int64(value in any::<i64>()) {
954 let result = infer_single(&value.to_string());
955 prop_assert_eq!(result, RedisType::Int64);
956 }
957
958 #[test]
960 fn prop_infer_float64(value in any::<f64>().prop_filter("Must be finite", |v| v.is_finite())) {
961 let s = format!("{:.1}", value);
963 let result = infer_single(&s);
964 prop_assert_eq!(result, RedisType::Float64);
965 }
966
967 #[test]
969 fn prop_infer_utf8(s in "[a-zA-Z]{2}[a-zA-Z0-9]*") {
970 let lower = s.to_lowercase();
972 prop_assume!(!matches!(lower.as_str(), "true" | "false" | "yes" | "no" | "t" | "f" | "y" | "n"));
973 let result = infer_single(&s);
974 prop_assert_eq!(result, RedisType::Utf8);
975 }
976
977 #[test]
979 fn prop_infer_boolean(b in prop::bool::ANY) {
980 let s = if b { "true" } else { "false" };
981 let result = infer_single(s);
982 prop_assert_eq!(result, RedisType::Boolean);
983 }
984
985 #[test]
987 fn prop_overwrite_preserves_sample_count(count in 1usize..1000) {
988 let inferred = InferredSchema {
989 fields: vec![("x".to_string(), RedisType::Utf8)],
990 sample_count: count,
991 };
992 let merged = inferred.with_overwrite(&[("x".to_string(), RedisType::Int64)]);
993 prop_assert_eq!(merged.sample_count, count);
994 }
995
996 #[test]
998 fn prop_overwrite_includes_originals(
999 field_count in 1usize..20,
1000 ) {
1001 let fields: Vec<(String, RedisType)> = (0..field_count)
1002 .map(|i| (format!("field_{}", i), RedisType::Utf8))
1003 .collect();
1004
1005 let inferred = InferredSchema {
1006 fields: fields.clone(),
1007 sample_count: 10,
1008 };
1009
1010 let merged = inferred.with_overwrite(&[]);
1011 prop_assert_eq!(merged.fields.len(), field_count);
1012 }
1013
1014 #[test]
1016 fn prop_overwrite_replaces_type(
1017 field_name in "[a-z]+",
1018 ) {
1019 let inferred = InferredSchema {
1020 fields: vec![(field_name.clone(), RedisType::Utf8)],
1021 sample_count: 5,
1022 };
1023
1024 let merged = inferred.with_overwrite(&[(field_name.clone(), RedisType::Int64)]);
1025
1026 let field = merged.fields.iter().find(|(n, _)| n == &field_name).unwrap();
1027 prop_assert!(matches!(field.1, RedisType::Int64));
1028 }
1029 }
1030 }
1031
1032 #[test]
1037 fn test_infer_type_whitespace() {
1038 assert_eq!(infer_single(" "), RedisType::Utf8);
1040 assert_eq!(infer_single("\t"), RedisType::Utf8);
1041 assert_eq!(infer_single("\n"), RedisType::Utf8);
1042 }
1043
1044 #[test]
1045 fn test_infer_type_special_numbers() {
1046 assert_eq!(infer_single("0xFF"), RedisType::Utf8);
1048
1049 assert_eq!(infer_single("0o777"), RedisType::Utf8);
1051
1052 assert_eq!(infer_single("0b1010"), RedisType::Utf8);
1054 }
1055
1056 #[test]
1057 fn test_infer_type_numeric_edge_cases() {
1058 assert_eq!(infer_single("007"), RedisType::Int64);
1060
1061 assert_eq!(infer_single("+42"), RedisType::Int64);
1063
1064 assert_eq!(infer_single("1e10"), RedisType::Float64);
1066 assert_eq!(infer_single("1E10"), RedisType::Float64);
1067 assert_eq!(infer_single("1.5e-3"), RedisType::Float64);
1068 }
1069
1070 #[test]
1071 fn test_infer_type_boolean_variations() {
1072 assert_eq!(infer_single("true"), RedisType::Boolean);
1074 assert_eq!(infer_single("false"), RedisType::Boolean);
1075 assert_eq!(infer_single("yes"), RedisType::Boolean);
1076 assert_eq!(infer_single("no"), RedisType::Boolean);
1077
1078 assert_eq!(infer_single("yep"), RedisType::Utf8);
1080 assert_eq!(infer_single("nope"), RedisType::Utf8);
1081 }
1082
1083 #[test]
1084 fn test_infer_json_type_nested() {
1085 let nested = serde_json::json!({"inner": {"deep": 123}});
1087 assert_eq!(infer_single_json(&nested), RedisType::Utf8);
1088
1089 let arr = serde_json::json!([1, 2, 3]);
1091 assert_eq!(infer_single_json(&arr), RedisType::Utf8);
1092 }
1093
1094 #[test]
1095 fn test_schema_overwrite_case_sensitive() {
1096 let inferred = InferredSchema {
1097 fields: vec![("Name".to_string(), RedisType::Utf8)],
1098 sample_count: 5,
1099 };
1100
1101 let merged = inferred.with_overwrite(&[("name".to_string(), RedisType::Int64)]);
1103 assert_eq!(merged.fields.len(), 2);
1104 }
1105
1106 #[test]
1111 fn test_confidence_all_integers() {
1112 let values = vec![
1113 Some("1".to_string()),
1114 Some("42".to_string()),
1115 Some("-10".to_string()),
1116 ];
1117 let (dtype, info) = infer_type_from_values_with_confidence(&values);
1118
1119 assert_eq!(dtype, RedisType::Int64);
1120 assert_eq!(info.confidence, 1.0);
1121 assert_eq!(info.samples, 3);
1122 assert_eq!(info.valid, 3);
1123 assert_eq!(info.nulls, 0);
1124 assert_eq!(info.type_candidates.get("int64"), Some(&3));
1125 }
1126
1127 #[test]
1128 fn test_confidence_with_nulls() {
1129 let values = vec![Some("42".to_string()), None, Some("100".to_string()), None];
1130 let (dtype, info) = infer_type_from_values_with_confidence(&values);
1131
1132 assert_eq!(dtype, RedisType::Int64);
1133 assert_eq!(info.confidence, 1.0); assert_eq!(info.samples, 4);
1135 assert_eq!(info.valid, 2);
1136 assert_eq!(info.nulls, 2);
1137 assert!((info.null_ratio() - 0.5).abs() < 0.001);
1138 }
1139
1140 #[test]
1141 fn test_confidence_mixed_types_low_confidence() {
1142 let values = vec![
1145 Some("1".to_string()),
1146 Some("2".to_string()),
1147 Some("3".to_string()),
1148 Some("hello".to_string()),
1149 Some("world".to_string()),
1150 ];
1151 let (dtype, info) = infer_type_from_values_with_confidence(&values);
1152
1153 assert_eq!(dtype, RedisType::Float64);
1155 assert!((info.confidence - 0.6).abs() < 0.001);
1156 assert!(!info.is_confident(0.9));
1157 assert!(info.is_confident(0.5));
1158 }
1159
1160 #[test]
1161 fn test_confidence_all_nulls() {
1162 let values: Vec<Option<String>> = vec![None, None, None];
1163 let (dtype, info) = infer_type_from_values_with_confidence(&values);
1164
1165 assert_eq!(dtype, RedisType::Utf8);
1166 assert_eq!(info.confidence, 1.0); assert_eq!(info.samples, 3);
1168 assert_eq!(info.valid, 0);
1169 assert_eq!(info.nulls, 3);
1170 }
1171
1172 #[test]
1173 fn test_confidence_empty() {
1174 let values: Vec<Option<String>> = vec![];
1175 let (dtype, info) = infer_type_from_values_with_confidence(&values);
1176
1177 assert_eq!(dtype, RedisType::Utf8);
1178 assert_eq!(info.confidence, 1.0);
1179 assert_eq!(info.samples, 0);
1180 }
1181
1182 #[test]
1183 fn test_field_inference_info_is_confident() {
1184 let info = FieldInferenceInfo {
1185 inferred_type: RedisType::Int64,
1186 confidence: 0.85,
1187 samples: 100,
1188 valid: 85,
1189 nulls: 0,
1190 type_candidates: HashMap::new(),
1191 };
1192
1193 assert!(info.is_confident(0.8));
1194 assert!(!info.is_confident(0.9));
1195 }
1196
1197 #[test]
1198 fn test_field_inference_info_null_ratio() {
1199 let info = FieldInferenceInfo {
1200 inferred_type: RedisType::Int64,
1201 confidence: 1.0,
1202 samples: 100,
1203 valid: 75,
1204 nulls: 25,
1205 type_candidates: HashMap::new(),
1206 };
1207
1208 assert!((info.null_ratio() - 0.25).abs() < 0.001);
1209 }
1210
1211 #[test]
1212 fn test_inferred_schema_with_confidence_to_basic() {
1213 let mut field_info = HashMap::new();
1214 field_info.insert(
1215 "age".to_string(),
1216 FieldInferenceInfo {
1217 inferred_type: RedisType::Int64,
1218 confidence: 1.0,
1219 samples: 10,
1220 valid: 10,
1221 nulls: 0,
1222 type_candidates: HashMap::new(),
1223 },
1224 );
1225
1226 let schema = InferredSchemaWithConfidence {
1227 fields: vec![("age".to_string(), RedisType::Int64)],
1228 sample_count: 10,
1229 field_info,
1230 };
1231
1232 let basic = schema.to_basic();
1233 assert_eq!(basic.fields.len(), 1);
1234 assert_eq!(basic.sample_count, 10);
1235 }
1236
1237 #[test]
1238 fn test_inferred_schema_with_confidence_low_confidence_fields() {
1239 let mut field_info = HashMap::new();
1240 field_info.insert(
1241 "good".to_string(),
1242 FieldInferenceInfo {
1243 inferred_type: RedisType::Int64,
1244 confidence: 0.95,
1245 samples: 100,
1246 valid: 95,
1247 nulls: 0,
1248 type_candidates: HashMap::new(),
1249 },
1250 );
1251 field_info.insert(
1252 "bad".to_string(),
1253 FieldInferenceInfo {
1254 inferred_type: RedisType::Float64,
1255 confidence: 0.6,
1256 samples: 100,
1257 valid: 60,
1258 nulls: 0,
1259 type_candidates: HashMap::new(),
1260 },
1261 );
1262
1263 let schema = InferredSchemaWithConfidence {
1264 fields: vec![
1265 ("bad".to_string(), RedisType::Float64),
1266 ("good".to_string(), RedisType::Int64),
1267 ],
1268 sample_count: 100,
1269 field_info,
1270 };
1271
1272 let low = schema.low_confidence_fields(0.9);
1273 assert_eq!(low.len(), 1);
1274 assert_eq!(low[0].0, "bad");
1275 assert!((low[0].1 - 0.6).abs() < 0.001);
1276 }
1277
1278 #[test]
1279 fn test_inferred_schema_with_confidence_all_confident() {
1280 let mut field_info = HashMap::new();
1281 field_info.insert(
1282 "a".to_string(),
1283 FieldInferenceInfo {
1284 inferred_type: RedisType::Int64,
1285 confidence: 0.95,
1286 samples: 100,
1287 valid: 95,
1288 nulls: 0,
1289 type_candidates: HashMap::new(),
1290 },
1291 );
1292 field_info.insert(
1293 "b".to_string(),
1294 FieldInferenceInfo {
1295 inferred_type: RedisType::Utf8,
1296 confidence: 1.0,
1297 samples: 100,
1298 valid: 100,
1299 nulls: 0,
1300 type_candidates: HashMap::new(),
1301 },
1302 );
1303
1304 let schema = InferredSchemaWithConfidence {
1305 fields: vec![
1306 ("a".to_string(), RedisType::Int64),
1307 ("b".to_string(), RedisType::Utf8),
1308 ],
1309 sample_count: 100,
1310 field_info,
1311 };
1312
1313 assert!(schema.all_confident(0.9));
1314 assert!(!schema.all_confident(0.99));
1315 }
1316
1317 #[test]
1318 fn test_inferred_schema_with_confidence_average() {
1319 let mut field_info = HashMap::new();
1320 field_info.insert(
1321 "a".to_string(),
1322 FieldInferenceInfo {
1323 inferred_type: RedisType::Int64,
1324 confidence: 1.0,
1325 samples: 100,
1326 valid: 100,
1327 nulls: 0,
1328 type_candidates: HashMap::new(),
1329 },
1330 );
1331 field_info.insert(
1332 "b".to_string(),
1333 FieldInferenceInfo {
1334 inferred_type: RedisType::Float64,
1335 confidence: 0.8,
1336 samples: 100,
1337 valid: 80,
1338 nulls: 0,
1339 type_candidates: HashMap::new(),
1340 },
1341 );
1342
1343 let schema = InferredSchemaWithConfidence {
1344 fields: vec![
1345 ("a".to_string(), RedisType::Int64),
1346 ("b".to_string(), RedisType::Float64),
1347 ],
1348 sample_count: 100,
1349 field_info,
1350 };
1351
1352 assert!((schema.average_confidence() - 0.9).abs() < 0.001);
1353 }
1354
1355 #[test]
1356 fn test_confidence_type_candidates() {
1357 let values = vec![
1358 Some("1".to_string()),
1359 Some("2".to_string()),
1360 Some("3.5".to_string()),
1361 ];
1362 let (_, info) = infer_type_from_values_with_confidence(&values);
1363
1364 assert_eq!(info.type_candidates.get("float64"), Some(&3));
1366 assert_eq!(info.type_candidates.get("int64"), Some(&2));
1367 assert_eq!(info.type_candidates.get("utf8"), Some(&3));
1368 }
1369}