bq_schema_gen/validate/
mod.rs

1//! Schema validation module for validating data against BigQuery schemas.
2//!
3//! This module provides functionality to validate JSON/CSV data against
4//! an existing BigQuery schema, checking for:
5//! - Required fields presence
6//! - Type compatibility
7//! - Unknown fields
8
9pub mod error;
10
11pub use error::{ValidationError, ValidationErrorType, ValidationResult};
12
13use crate::inference::{
14    is_boolean_string, is_date, is_float_string, is_integer_string, is_time, is_timestamp,
15};
16use crate::schema::types::BqSchemaField;
17use serde_json::Value;
18use std::collections::HashMap;
19
20/// Configuration options for validation.
21#[derive(Debug, Clone)]
22pub struct ValidationOptions {
23    /// Don't fail on fields not in schema (convert to warnings)
24    pub allow_unknown: bool,
25    /// Strict type checking - JSON strings don't match INTEGER, etc.
26    pub strict_types: bool,
27    /// Maximum number of errors before stopping
28    pub max_errors: usize,
29}
30
31impl Default for ValidationOptions {
32    fn default() -> Self {
33        Self {
34            allow_unknown: false,
35            strict_types: false,
36            max_errors: 100,
37        }
38    }
39}
40
41/// Validator for checking data against a BigQuery schema.
42pub struct SchemaValidator<'a> {
43    schema: &'a [BqSchemaField],
44    options: ValidationOptions,
45    /// Schema fields indexed by lowercase name for quick lookup
46    schema_map: HashMap<String, &'a BqSchemaField>,
47}
48
49impl<'a> SchemaValidator<'a> {
50    /// Create a new validator with the given schema and options.
51    pub fn new(schema: &'a [BqSchemaField], options: ValidationOptions) -> Self {
52        let schema_map = schema.iter().map(|f| (f.name.to_lowercase(), f)).collect();
53
54        Self {
55            schema,
56            options,
57            schema_map,
58        }
59    }
60
61    /// Validate a single record against the schema.
62    ///
63    /// Returns validation errors found in this record.
64    pub fn validate_record(
65        &self,
66        record: &Value,
67        line: usize,
68        result: &mut ValidationResult,
69    ) -> bool {
70        if result.reached_max_errors(self.options.max_errors) {
71            return false;
72        }
73
74        match record {
75            Value::Object(obj) => {
76                self.validate_object(obj, line, "", self.schema, &self.schema_map, result);
77            }
78            _ => {
79                result.add_error(ValidationError {
80                    line,
81                    path: String::new(),
82                    error_type: ValidationErrorType::TypeMismatch {
83                        expected: "RECORD".to_string(),
84                        actual: json_type_name(record).to_string(),
85                    },
86                    message: format!("Expected object/record, got {}", json_type_name(record)),
87                });
88            }
89        }
90
91        !result.reached_max_errors(self.options.max_errors)
92    }
93
94    /// Validate an object against schema fields.
95    fn validate_object(
96        &self,
97        obj: &serde_json::Map<String, Value>,
98        line: usize,
99        prefix: &str,
100        schema_fields: &[BqSchemaField],
101        field_map: &HashMap<String, &BqSchemaField>,
102        result: &mut ValidationResult,
103    ) {
104        // Check for required fields
105        for field in schema_fields {
106            if field.mode == "REQUIRED" {
107                let key_lower = field.name.to_lowercase();
108                let found = obj.keys().any(|k| k.to_lowercase() == key_lower);
109
110                if !found {
111                    let path = make_path(prefix, &field.name);
112                    result.add_error(ValidationError::missing_required(line, &path));
113                    if result.reached_max_errors(self.options.max_errors) {
114                        return;
115                    }
116                } else {
117                    // Check if the value is null for a required field
118                    if let Some(value) = obj.iter().find(|(k, _)| k.to_lowercase() == key_lower) {
119                        if value.1.is_null() {
120                            let path = make_path(prefix, &field.name);
121                            result.add_error(ValidationError::missing_required(line, &path));
122                            if result.reached_max_errors(self.options.max_errors) {
123                                return;
124                            }
125                        }
126                    }
127                }
128            }
129        }
130
131        // Check each field in the data
132        for (key, value) in obj {
133            let path = make_path(prefix, key);
134            let key_lower = key.to_lowercase();
135
136            if result.reached_max_errors(self.options.max_errors) {
137                return;
138            }
139
140            match field_map.get(&key_lower) {
141                Some(field) => {
142                    // Validate the value against the field definition
143                    self.validate_value(value, field, line, &path, result);
144                }
145                None => {
146                    // Unknown field
147                    let error = ValidationError::unknown_field(line, &path);
148                    if self.options.allow_unknown {
149                        result.add_warning(error);
150                    } else {
151                        result.add_error(error);
152                    }
153                }
154            }
155        }
156    }
157
158    /// Validate a single value against a field definition.
159    fn validate_value(
160        &self,
161        value: &Value,
162        field: &BqSchemaField,
163        line: usize,
164        path: &str,
165        result: &mut ValidationResult,
166    ) {
167        if result.reached_max_errors(self.options.max_errors) {
168            return;
169        }
170
171        // Handle null values
172        if value.is_null() {
173            // Already checked for REQUIRED fields above
174            return;
175        }
176
177        // Handle REPEATED (array) fields
178        if field.mode == "REPEATED" {
179            match value {
180                Value::Array(arr) => {
181                    for (idx, item) in arr.iter().enumerate() {
182                        let item_path = format!("{}[{}]", path, idx);
183                        self.validate_single_value(item, field, line, &item_path, result);
184                        if result.reached_max_errors(self.options.max_errors) {
185                            return;
186                        }
187                    }
188                }
189                _ => {
190                    result.add_error(ValidationError::type_mismatch(
191                        line,
192                        path,
193                        "ARRAY",
194                        json_type_name(value),
195                        &truncate_value(value),
196                    ));
197                }
198            }
199        } else {
200            self.validate_single_value(value, field, line, path, result);
201        }
202    }
203
204    /// Validate a single (non-array) value against a field type.
205    fn validate_single_value(
206        &self,
207        value: &Value,
208        field: &BqSchemaField,
209        line: usize,
210        path: &str,
211        result: &mut ValidationResult,
212    ) {
213        if result.reached_max_errors(self.options.max_errors) {
214            return;
215        }
216
217        // Handle null in arrays (allowed)
218        if value.is_null() {
219            return;
220        }
221
222        let expected_type = &field.field_type;
223
224        match expected_type.as_str() {
225            "RECORD" => {
226                match value {
227                    Value::Object(obj) => {
228                        // Recursively validate nested fields
229                        if let Some(nested_fields) = &field.fields {
230                            let nested_map: HashMap<String, &BqSchemaField> = nested_fields
231                                .iter()
232                                .map(|f| (f.name.to_lowercase(), f))
233                                .collect();
234                            self.validate_object(
235                                obj,
236                                line,
237                                path,
238                                nested_fields,
239                                &nested_map,
240                                result,
241                            );
242                        }
243                    }
244                    _ => {
245                        result.add_error(ValidationError::type_mismatch(
246                            line,
247                            path,
248                            "RECORD",
249                            json_type_name(value),
250                            &truncate_value(value),
251                        ));
252                    }
253                }
254            }
255            "STRING" => {
256                // Most types can be coerced to string
257                if !matches!(value, Value::String(_) | Value::Number(_) | Value::Bool(_)) {
258                    result.add_error(ValidationError::type_mismatch(
259                        line,
260                        path,
261                        "STRING",
262                        json_type_name(value),
263                        &truncate_value(value),
264                    ));
265                }
266            }
267            "INTEGER" => {
268                if !self.is_valid_integer(value) {
269                    result.add_error(ValidationError::type_mismatch(
270                        line,
271                        path,
272                        "INTEGER",
273                        json_type_name(value),
274                        &truncate_value(value),
275                    ));
276                }
277            }
278            "FLOAT" => {
279                if !self.is_valid_float(value) {
280                    result.add_error(ValidationError::type_mismatch(
281                        line,
282                        path,
283                        "FLOAT",
284                        json_type_name(value),
285                        &truncate_value(value),
286                    ));
287                }
288            }
289            "BOOLEAN" => {
290                if !self.is_valid_boolean(value) {
291                    result.add_error(ValidationError::type_mismatch(
292                        line,
293                        path,
294                        "BOOLEAN",
295                        json_type_name(value),
296                        &truncate_value(value),
297                    ));
298                }
299            }
300            "TIMESTAMP" => {
301                if !self.is_valid_timestamp(value) {
302                    result.add_error(ValidationError::type_mismatch(
303                        line,
304                        path,
305                        "TIMESTAMP",
306                        json_type_name(value),
307                        &truncate_value(value),
308                    ));
309                }
310            }
311            "DATE" => {
312                if !self.is_valid_date(value) {
313                    result.add_error(ValidationError::type_mismatch(
314                        line,
315                        path,
316                        "DATE",
317                        json_type_name(value),
318                        &truncate_value(value),
319                    ));
320                }
321            }
322            "TIME" => {
323                if !self.is_valid_time(value) {
324                    result.add_error(ValidationError::type_mismatch(
325                        line,
326                        path,
327                        "TIME",
328                        json_type_name(value),
329                        &truncate_value(value),
330                    ));
331                }
332            }
333            _ => {
334                // Unknown type - skip validation
335            }
336        }
337    }
338
339    /// Check if a value is valid for INTEGER type.
340    fn is_valid_integer(&self, value: &Value) -> bool {
341        match value {
342            Value::Number(n) => n.is_i64() || n.is_u64(),
343            Value::String(s) if !self.options.strict_types => is_integer_string(s),
344            _ => false,
345        }
346    }
347
348    /// Check if a value is valid for FLOAT type.
349    fn is_valid_float(&self, value: &Value) -> bool {
350        match value {
351            Value::Number(_) => true,
352            Value::String(s) if !self.options.strict_types => {
353                is_float_string(s) || is_integer_string(s)
354            }
355            _ => false,
356        }
357    }
358
359    /// Check if a value is valid for BOOLEAN type.
360    fn is_valid_boolean(&self, value: &Value) -> bool {
361        match value {
362            Value::Bool(_) => true,
363            Value::String(s) if !self.options.strict_types => is_boolean_string(s),
364            _ => false,
365        }
366    }
367
368    /// Check if a value is valid for TIMESTAMP type.
369    fn is_valid_timestamp(&self, value: &Value) -> bool {
370        match value {
371            Value::String(s) => is_timestamp(s),
372            Value::Number(_) if !self.options.strict_types => true, // Unix timestamp
373            _ => false,
374        }
375    }
376
377    /// Check if a value is valid for DATE type.
378    fn is_valid_date(&self, value: &Value) -> bool {
379        match value {
380            Value::String(s) => is_date(s),
381            _ => false,
382        }
383    }
384
385    /// Check if a value is valid for TIME type.
386    fn is_valid_time(&self, value: &Value) -> bool {
387        match value {
388            Value::String(s) => is_time(s),
389            _ => false,
390        }
391    }
392}
393
394/// Build a field path string.
395fn make_path(prefix: &str, name: &str) -> String {
396    if prefix.is_empty() {
397        name.to_string()
398    } else {
399        format!("{}.{}", prefix, name)
400    }
401}
402
403/// Get a human-readable type name for a JSON value.
404fn json_type_name(value: &Value) -> &'static str {
405    match value {
406        Value::Null => "NULL",
407        Value::Bool(_) => "BOOLEAN",
408        Value::Number(n) => {
409            if n.is_i64() || n.is_u64() {
410                "INTEGER"
411            } else {
412                "FLOAT"
413            }
414        }
415        Value::String(_) => "STRING",
416        Value::Array(_) => "ARRAY",
417        Value::Object(_) => "RECORD",
418    }
419}
420
421/// Truncate a value for display in error messages.
422fn truncate_value(value: &Value) -> String {
423    let s = match value {
424        Value::String(s) => s.clone(),
425        _ => value.to_string(),
426    };
427    if s.len() > 50 {
428        format!("{}...", &s[..47])
429    } else {
430        s
431    }
432}
433
434/// Validate data from a JSON iterator against a schema.
435///
436/// This function processes records line-by-line for memory efficiency.
437pub fn validate_json_data<R: std::io::BufRead>(
438    reader: R,
439    schema: &[BqSchemaField],
440    options: ValidationOptions,
441) -> crate::Result<ValidationResult> {
442    use crate::input::JsonRecordIterator;
443
444    let validator = SchemaValidator::new(schema, options.clone());
445    let mut result = ValidationResult::new();
446
447    let iter = JsonRecordIterator::new(reader, true); // ignore_invalid_lines=true to collect all errors
448
449    for record_result in iter {
450        match record_result {
451            Ok((line, record)) => {
452                if !validator.validate_record(&record, line, &mut result) {
453                    break; // Max errors reached
454                }
455            }
456            Err(e) => {
457                // Parse error
458                result.add_error(ValidationError {
459                    line: 0,
460                    path: String::new(),
461                    error_type: ValidationErrorType::TypeMismatch {
462                        expected: "valid JSON".to_string(),
463                        actual: "parse error".to_string(),
464                    },
465                    message: format!("JSON parse error: {}", e),
466                });
467                if result.reached_max_errors(options.max_errors) {
468                    break;
469                }
470            }
471        }
472    }
473
474    Ok(result)
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use serde_json::json;
481
482    fn make_field(name: &str, field_type: &str, mode: &str) -> BqSchemaField {
483        BqSchemaField {
484            name: name.to_string(),
485            field_type: field_type.to_string(),
486            mode: mode.to_string(),
487            fields: None,
488        }
489    }
490
491    fn make_record_field(name: &str, mode: &str, fields: Vec<BqSchemaField>) -> BqSchemaField {
492        BqSchemaField {
493            name: name.to_string(),
494            field_type: "RECORD".to_string(),
495            mode: mode.to_string(),
496            fields: Some(fields),
497        }
498    }
499
500    #[test]
501    fn test_valid_simple_record() {
502        let schema = vec![
503            make_field("name", "STRING", "NULLABLE"),
504            make_field("age", "INTEGER", "NULLABLE"),
505        ];
506
507        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
508        let mut result = ValidationResult::new();
509
510        let record = json!({"name": "John", "age": 30});
511        validator.validate_record(&record, 1, &mut result);
512
513        assert!(result.valid);
514        assert_eq!(result.error_count, 0);
515    }
516
517    #[test]
518    fn test_missing_required_field() {
519        let schema = vec![
520            make_field("name", "STRING", "REQUIRED"),
521            make_field("age", "INTEGER", "NULLABLE"),
522        ];
523
524        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
525        let mut result = ValidationResult::new();
526
527        let record = json!({"age": 30});
528        validator.validate_record(&record, 42, &mut result);
529
530        assert!(!result.valid);
531        assert_eq!(result.error_count, 1);
532        assert_eq!(result.errors[0].line, 42);
533        assert!(matches!(
534            result.errors[0].error_type,
535            ValidationErrorType::MissingRequired
536        ));
537    }
538
539    #[test]
540    fn test_null_required_field() {
541        let schema = vec![make_field("name", "STRING", "REQUIRED")];
542
543        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
544        let mut result = ValidationResult::new();
545
546        let record = json!({"name": null});
547        validator.validate_record(&record, 1, &mut result);
548
549        assert!(!result.valid);
550        assert!(matches!(
551            result.errors[0].error_type,
552            ValidationErrorType::MissingRequired
553        ));
554    }
555
556    #[test]
557    fn test_type_mismatch() {
558        let schema = vec![make_field("age", "INTEGER", "NULLABLE")];
559
560        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
561        let mut result = ValidationResult::new();
562
563        let record = json!({"age": "thirty"});
564        validator.validate_record(&record, 1, &mut result);
565
566        assert!(!result.valid);
567        assert!(matches!(
568            result.errors[0].error_type,
569            ValidationErrorType::TypeMismatch { .. }
570        ));
571    }
572
573    #[test]
574    fn test_lenient_type_coercion() {
575        let schema = vec![
576            make_field("age", "INTEGER", "NULLABLE"),
577            make_field("active", "BOOLEAN", "NULLABLE"),
578        ];
579
580        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
581        let mut result = ValidationResult::new();
582
583        // "123" should be valid INTEGER in lenient mode
584        let record = json!({"age": "123", "active": "true"});
585        validator.validate_record(&record, 1, &mut result);
586
587        assert!(result.valid);
588    }
589
590    #[test]
591    fn test_strict_type_checking() {
592        let schema = vec![make_field("age", "INTEGER", "NULLABLE")];
593
594        let options = ValidationOptions {
595            strict_types: true,
596            ..Default::default()
597        };
598        let validator = SchemaValidator::new(&schema, options);
599        let mut result = ValidationResult::new();
600
601        // "123" should fail INTEGER in strict mode
602        let record = json!({"age": "123"});
603        validator.validate_record(&record, 1, &mut result);
604
605        assert!(!result.valid);
606    }
607
608    #[test]
609    fn test_unknown_field_error() {
610        let schema = vec![make_field("name", "STRING", "NULLABLE")];
611
612        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
613        let mut result = ValidationResult::new();
614
615        let record = json!({"name": "John", "unknown": 123});
616        validator.validate_record(&record, 1, &mut result);
617
618        assert!(!result.valid);
619        assert!(matches!(
620            result.errors[0].error_type,
621            ValidationErrorType::UnknownField
622        ));
623    }
624
625    #[test]
626    fn test_unknown_field_allowed() {
627        let schema = vec![make_field("name", "STRING", "NULLABLE")];
628
629        let options = ValidationOptions {
630            allow_unknown: true,
631            ..Default::default()
632        };
633        let validator = SchemaValidator::new(&schema, options);
634        let mut result = ValidationResult::new();
635
636        let record = json!({"name": "John", "unknown": 123});
637        validator.validate_record(&record, 1, &mut result);
638
639        assert!(result.valid);
640        assert_eq!(result.warnings.len(), 1);
641    }
642
643    #[test]
644    fn test_nested_record_validation() {
645        let schema = vec![make_record_field(
646            "user",
647            "NULLABLE",
648            vec![
649                make_field("name", "STRING", "REQUIRED"),
650                make_field("email", "STRING", "NULLABLE"),
651            ],
652        )];
653
654        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
655        let mut result = ValidationResult::new();
656
657        // Missing required nested field
658        let record = json!({"user": {"email": "test@example.com"}});
659        validator.validate_record(&record, 1, &mut result);
660
661        assert!(!result.valid);
662        assert!(result.errors[0].path.contains("user.name"));
663    }
664
665    #[test]
666    fn test_repeated_field_validation() {
667        let schema = vec![make_field("tags", "STRING", "REPEATED")];
668
669        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
670        let mut result = ValidationResult::new();
671
672        // Valid array
673        let record = json!({"tags": ["a", "b", "c"]});
674        validator.validate_record(&record, 1, &mut result);
675        assert!(result.valid);
676
677        // Non-array should fail
678        let mut result2 = ValidationResult::new();
679        let record2 = json!({"tags": "not-an-array"});
680        validator.validate_record(&record2, 1, &mut result2);
681        assert!(!result2.valid);
682    }
683
684    #[test]
685    fn test_max_errors_limit() {
686        let schema = vec![
687            make_field("a", "INTEGER", "NULLABLE"),
688            make_field("b", "INTEGER", "NULLABLE"),
689            make_field("c", "INTEGER", "NULLABLE"),
690        ];
691
692        let options = ValidationOptions {
693            max_errors: 2,
694            ..Default::default()
695        };
696        let validator = SchemaValidator::new(&schema, options);
697        let mut result = ValidationResult::new();
698
699        let record = json!({"a": "x", "b": "y", "c": "z"});
700        validator.validate_record(&record, 1, &mut result);
701
702        // Should stop after 2 errors
703        assert_eq!(result.error_count, 2);
704    }
705
706    #[test]
707    fn test_date_time_validation() {
708        let schema = vec![
709            make_field("date_field", "DATE", "NULLABLE"),
710            make_field("time_field", "TIME", "NULLABLE"),
711            make_field("timestamp_field", "TIMESTAMP", "NULLABLE"),
712        ];
713
714        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
715        let mut result = ValidationResult::new();
716
717        let record = json!({
718            "date_field": "2024-01-15",
719            "time_field": "12:30:45",
720            "timestamp_field": "2024-01-15T12:30:45"
721        });
722        validator.validate_record(&record, 1, &mut result);
723
724        assert!(result.valid);
725    }
726
727    #[test]
728    fn test_invalid_date_format() {
729        let schema = vec![make_field("date_field", "DATE", "NULLABLE")];
730
731        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
732        let mut result = ValidationResult::new();
733
734        let record = json!({"date_field": "01-15-2024"}); // Wrong format
735        validator.validate_record(&record, 1, &mut result);
736
737        assert!(!result.valid);
738    }
739
740    #[test]
741    fn test_empty_record_validation() {
742        // Schema with only NULLABLE fields
743        let schema = vec![
744            make_field("optional1", "STRING", "NULLABLE"),
745            make_field("optional2", "INTEGER", "NULLABLE"),
746        ];
747
748        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
749        let mut result = ValidationResult::new();
750
751        // Empty record should pass if all fields are NULLABLE
752        let record = json!({});
753        validator.validate_record(&record, 1, &mut result);
754
755        assert!(result.valid);
756        assert_eq!(result.error_count, 0);
757    }
758
759    #[test]
760    fn test_deeply_nested_validation_5_levels() {
761        let schema = vec![make_record_field(
762            "l1",
763            "NULLABLE",
764            vec![make_record_field(
765                "l2",
766                "NULLABLE",
767                vec![make_record_field(
768                    "l3",
769                    "NULLABLE",
770                    vec![make_record_field(
771                        "l4",
772                        "NULLABLE",
773                        vec![make_field("l5", "STRING", "REQUIRED")],
774                    )],
775                )],
776            )],
777        )];
778
779        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
780
781        // Valid deeply nested data
782        let mut result1 = ValidationResult::new();
783        let valid_record = json!({"l1": {"l2": {"l3": {"l4": {"l5": "value"}}}}});
784        validator.validate_record(&valid_record, 1, &mut result1);
785        assert!(result1.valid);
786
787        // Missing required field at depth 5
788        let mut result2 = ValidationResult::new();
789        let invalid_record = json!({"l1": {"l2": {"l3": {"l4": {}}}}});
790        validator.validate_record(&invalid_record, 1, &mut result2);
791        assert!(!result2.valid);
792        assert!(result2.errors[0].path.contains("l1.l2.l3.l4.l5"));
793    }
794
795    #[test]
796    fn test_float_integer_boundary_validation() {
797        let schema = vec![make_field("big_num", "INTEGER", "NULLABLE")];
798
799        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
800
801        // i64::MAX should be valid
802        let mut result1 = ValidationResult::new();
803        let record1 = json!({"big_num": 9223372036854775807_i64});
804        validator.validate_record(&record1, 1, &mut result1);
805        assert!(result1.valid);
806
807        // Float should not be valid for INTEGER
808        let mut result2 = ValidationResult::new();
809        let record2 = json!({"big_num": 3.5});
810        validator.validate_record(&record2, 1, &mut result2);
811        assert!(!result2.valid);
812    }
813
814    #[test]
815    fn test_timestamp_unix_epoch_numeric() {
816        let schema = vec![make_field("ts", "TIMESTAMP", "NULLABLE")];
817
818        // Lenient mode - numeric timestamps allowed
819        let lenient_validator = SchemaValidator::new(&schema, ValidationOptions::default());
820        let mut result1 = ValidationResult::new();
821        let record = json!({"ts": 1609459200});
822        lenient_validator.validate_record(&record, 1, &mut result1);
823        assert!(
824            result1.valid,
825            "Numeric timestamp should be valid in lenient mode"
826        );
827
828        // Strict mode - only string timestamps
829        let strict_options = ValidationOptions {
830            strict_types: true,
831            ..Default::default()
832        };
833        let strict_validator = SchemaValidator::new(&schema, strict_options);
834        let mut result2 = ValidationResult::new();
835        strict_validator.validate_record(&record, 1, &mut result2);
836        assert!(
837            !result2.valid,
838            "Numeric timestamp should be invalid in strict mode"
839        );
840    }
841
842    #[test]
843    fn test_repeated_with_nulls_in_array() {
844        let schema = vec![make_field("values", "INTEGER", "REPEATED")];
845
846        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
847        let mut result = ValidationResult::new();
848
849        // Array with nulls should be valid (nulls are allowed in arrays)
850        let record = json!({"values": [1, null, 2, null, 3]});
851        validator.validate_record(&record, 1, &mut result);
852
853        assert!(result.valid, "Nulls in arrays should be allowed");
854    }
855
856    #[test]
857    fn test_case_insensitive_field_matching() {
858        let schema = vec![make_field("UserName", "STRING", "NULLABLE")];
859
860        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
861        let mut result = ValidationResult::new();
862
863        // Different casing should match
864        let record = json!({"username": "test"});
865        validator.validate_record(&record, 1, &mut result);
866
867        assert!(result.valid, "Field matching should be case-insensitive");
868    }
869
870    #[test]
871    fn test_empty_string_for_required_field() {
872        let schema = vec![make_field("name", "STRING", "REQUIRED")];
873
874        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
875        let mut result = ValidationResult::new();
876
877        // Empty string should count as present (not null)
878        let record = json!({"name": ""});
879        validator.validate_record(&record, 1, &mut result);
880
881        assert!(result.valid, "Empty string should satisfy REQUIRED");
882    }
883
884    #[test]
885    fn test_validation_json_output_structure() {
886        let schema = vec![make_field("id", "INTEGER", "REQUIRED")];
887
888        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
889        let mut result = ValidationResult::new();
890
891        let record = json!({"wrong_field": 42});
892        validator.validate_record(&record, 1, &mut result);
893
894        assert!(!result.valid);
895        assert_eq!(result.error_count, result.errors.len());
896
897        // Check error structure
898        let error = &result.errors[0];
899        assert!(error.line > 0);
900        assert!(!error.path.is_empty() || error.path == "id");
901        assert!(!error.message.is_empty());
902    }
903
904    #[test]
905    fn test_multiple_errors_in_single_record() {
906        let schema = vec![
907            make_field("a", "INTEGER", "REQUIRED"),
908            make_field("b", "INTEGER", "REQUIRED"),
909            make_field("c", "INTEGER", "REQUIRED"),
910        ];
911
912        let options = ValidationOptions {
913            max_errors: 100,
914            ..Default::default()
915        };
916        let validator = SchemaValidator::new(&schema, options);
917        let mut result = ValidationResult::new();
918
919        // Record missing all required fields
920        let record = json!({});
921        validator.validate_record(&record, 1, &mut result);
922
923        assert!(!result.valid);
924        assert_eq!(
925            result.error_count, 3,
926            "Should report all missing required fields"
927        );
928    }
929
930    #[test]
931    fn test_nested_unknown_field() {
932        let schema = vec![make_record_field(
933            "user",
934            "NULLABLE",
935            vec![make_field("name", "STRING", "NULLABLE")],
936        )];
937
938        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
939        let mut result = ValidationResult::new();
940
941        let record = json!({"user": {"name": "test", "unknown_nested": 123}});
942        validator.validate_record(&record, 1, &mut result);
943
944        assert!(!result.valid);
945        assert!(result.errors[0].path.contains("user.unknown_nested"));
946    }
947
948    #[test]
949    fn test_array_of_records_validation() {
950        let schema = vec![make_record_field(
951            "items",
952            "REPEATED",
953            vec![
954                make_field("id", "INTEGER", "REQUIRED"),
955                make_field("name", "STRING", "NULLABLE"),
956            ],
957        )];
958
959        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
960
961        // Valid array of records
962        let mut result1 = ValidationResult::new();
963        let valid_record = json!({
964            "items": [
965                {"id": 1, "name": "first"},
966                {"id": 2, "name": "second"}
967            ]
968        });
969        validator.validate_record(&valid_record, 1, &mut result1);
970        assert!(result1.valid);
971
972        // Invalid: missing required field in one element
973        let mut result2 = ValidationResult::new();
974        let invalid_record = json!({
975            "items": [
976                {"id": 1, "name": "first"},
977                {"name": "second"}  // missing id
978            ]
979        });
980        validator.validate_record(&invalid_record, 1, &mut result2);
981        assert!(!result2.valid);
982        assert!(result2.errors[0].path.contains("[1]"));
983    }
984
985    #[test]
986    fn test_validate_integer_i64_min() {
987        let schema = vec![make_field("min_int", "INTEGER", "NULLABLE")];
988        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
989
990        // i64::MIN should be valid
991        let mut result = ValidationResult::new();
992        let record = json!({"min_int": -9223372036854775808_i64});
993        validator.validate_record(&record, 1, &mut result);
994        assert!(result.valid);
995    }
996
997    #[test]
998    fn test_validate_integer_zero() {
999        let schema = vec![make_field("zero_int", "INTEGER", "NULLABLE")];
1000        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1001
1002        let mut result = ValidationResult::new();
1003        let record = json!({"zero_int": 0});
1004        validator.validate_record(&record, 1, &mut result);
1005        assert!(result.valid);
1006    }
1007
1008    #[test]
1009    fn test_validate_integer_negative() {
1010        let schema = vec![make_field("neg_int", "INTEGER", "NULLABLE")];
1011        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1012
1013        let mut result = ValidationResult::new();
1014        let record = json!({"neg_int": -42});
1015        validator.validate_record(&record, 1, &mut result);
1016        assert!(result.valid);
1017    }
1018
1019    #[test]
1020    fn test_validate_timestamp_with_timezone() {
1021        let schema = vec![make_field("ts", "TIMESTAMP", "NULLABLE")];
1022        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1023
1024        // ISO 8601 with timezone offset
1025        let mut result1 = ValidationResult::new();
1026        let record1 = json!({"ts": "2024-01-15T12:30:45+05:30"});
1027        validator.validate_record(&record1, 1, &mut result1);
1028        assert!(result1.valid, "Timestamp with timezone should be valid");
1029
1030        // ISO 8601 with Z suffix (UTC)
1031        let mut result2 = ValidationResult::new();
1032        let record2 = json!({"ts": "2024-01-15T12:30:45Z"});
1033        validator.validate_record(&record2, 1, &mut result2);
1034        assert!(result2.valid, "Timestamp with Z suffix should be valid");
1035
1036        // Negative timezone offset
1037        let mut result3 = ValidationResult::new();
1038        let record3 = json!({"ts": "2024-01-15T12:30:45-08:00"});
1039        validator.validate_record(&record3, 1, &mut result3);
1040        assert!(
1041            result3.valid,
1042            "Timestamp with negative timezone should be valid"
1043        );
1044    }
1045
1046    #[test]
1047    fn test_validate_timestamp_milliseconds() {
1048        let schema = vec![make_field("ts", "TIMESTAMP", "NULLABLE")];
1049        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1050
1051        // Timestamp with milliseconds
1052        let mut result1 = ValidationResult::new();
1053        let record1 = json!({"ts": "2024-01-15T12:30:45.123"});
1054        validator.validate_record(&record1, 1, &mut result1);
1055        assert!(result1.valid, "Timestamp with milliseconds should be valid");
1056
1057        // Timestamp with microseconds
1058        let mut result2 = ValidationResult::new();
1059        let record2 = json!({"ts": "2024-01-15T12:30:45.123456"});
1060        validator.validate_record(&record2, 1, &mut result2);
1061        assert!(result2.valid, "Timestamp with microseconds should be valid");
1062    }
1063
1064    #[test]
1065    fn test_validate_timestamp_space_separator() {
1066        let schema = vec![make_field("ts", "TIMESTAMP", "NULLABLE")];
1067        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1068
1069        // Space instead of T separator (common in some systems)
1070        let mut result = ValidationResult::new();
1071        let record = json!({"ts": "2024-01-15 12:30:45"});
1072        validator.validate_record(&record, 1, &mut result);
1073        // This may or may not be valid depending on timestamp parsing
1074        // The test documents current behavior
1075    }
1076
1077    #[test]
1078    fn test_validate_deeply_nested_record_10_levels() {
1079        // Create deeply nested schema - 10 levels deep
1080        fn make_nested_schema(depth: usize, leaf_name: &str) -> BqSchemaField {
1081            if depth == 0 {
1082                make_field(leaf_name, "STRING", "REQUIRED")
1083            } else {
1084                make_record_field(
1085                    &format!("level{}", depth),
1086                    "NULLABLE",
1087                    vec![make_nested_schema(depth - 1, leaf_name)],
1088                )
1089            }
1090        }
1091
1092        let schema = vec![make_nested_schema(10, "deep_value")];
1093        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1094
1095        // Build nested JSON - 10 levels deep
1096        fn make_nested_json(depth: usize) -> serde_json::Value {
1097            if depth == 0 {
1098                json!({"deep_value": "found it!"})
1099            } else {
1100                let inner = make_nested_json(depth - 1);
1101                json!({format!("level{}", depth): inner})
1102            }
1103        }
1104
1105        let mut result = ValidationResult::new();
1106        let record = make_nested_json(10);
1107        validator.validate_record(&record, 1, &mut result);
1108        assert!(result.valid, "10-level deep nesting should be valid");
1109    }
1110
1111    #[test]
1112    fn test_validate_float_special_values() {
1113        let schema = vec![make_field("num", "FLOAT", "NULLABLE")];
1114        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1115
1116        // Very small float
1117        let mut result1 = ValidationResult::new();
1118        let record1 = json!({"num": 0.000000001});
1119        validator.validate_record(&record1, 1, &mut result1);
1120        assert!(result1.valid);
1121
1122        // Very large float
1123        let mut result2 = ValidationResult::new();
1124        let record2 = json!({"num": 1.7976931348623157e308});
1125        validator.validate_record(&record2, 1, &mut result2);
1126        assert!(result2.valid);
1127
1128        // Negative float
1129        let mut result3 = ValidationResult::new();
1130        let record3 = json!({"num": -123.456});
1131        validator.validate_record(&record3, 1, &mut result3);
1132        assert!(result3.valid);
1133    }
1134
1135    #[test]
1136    fn test_validate_date_edge_cases() {
1137        let schema = vec![make_field("d", "DATE", "NULLABLE")];
1138        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1139
1140        // Leap year date
1141        let mut result1 = ValidationResult::new();
1142        let record1 = json!({"d": "2024-02-29"});
1143        validator.validate_record(&record1, 1, &mut result1);
1144        assert!(result1.valid, "Leap year date should be valid");
1145
1146        // End of year
1147        let mut result2 = ValidationResult::new();
1148        let record2 = json!({"d": "2024-12-31"});
1149        validator.validate_record(&record2, 1, &mut result2);
1150        assert!(result2.valid);
1151
1152        // Beginning of year
1153        let mut result3 = ValidationResult::new();
1154        let record3 = json!({"d": "2024-01-01"});
1155        validator.validate_record(&record3, 1, &mut result3);
1156        assert!(result3.valid);
1157    }
1158
1159    #[test]
1160    fn test_validate_time_edge_cases() {
1161        let schema = vec![make_field("t", "TIME", "NULLABLE")];
1162        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1163
1164        // Midnight
1165        let mut result1 = ValidationResult::new();
1166        let record1 = json!({"t": "00:00:00"});
1167        validator.validate_record(&record1, 1, &mut result1);
1168        assert!(result1.valid, "Midnight should be valid");
1169
1170        // Last second of day
1171        let mut result2 = ValidationResult::new();
1172        let record2 = json!({"t": "23:59:59"});
1173        validator.validate_record(&record2, 1, &mut result2);
1174        assert!(result2.valid, "23:59:59 should be valid");
1175
1176        // Noon
1177        let mut result3 = ValidationResult::new();
1178        let record3 = json!({"t": "12:00:00"});
1179        validator.validate_record(&record3, 1, &mut result3);
1180        assert!(result3.valid, "Noon should be valid");
1181    }
1182
1183    #[test]
1184    fn test_validate_mixed_types_in_record() {
1185        let schema = vec![
1186            make_field("str", "STRING", "NULLABLE"),
1187            make_field("int", "INTEGER", "NULLABLE"),
1188            make_field("float", "FLOAT", "NULLABLE"),
1189            make_field("bool", "BOOLEAN", "NULLABLE"),
1190            make_field("date", "DATE", "NULLABLE"),
1191            make_field("time", "TIME", "NULLABLE"),
1192            make_field("ts", "TIMESTAMP", "NULLABLE"),
1193        ];
1194
1195        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1196        let mut result = ValidationResult::new();
1197
1198        let record = json!({
1199            "str": "hello",
1200            "int": 42,
1201            "float": 2.71,
1202            "bool": true,
1203            "date": "2024-01-15",
1204            "time": "12:30:00",
1205            "ts": "2024-01-15T12:30:00"
1206        });
1207        validator.validate_record(&record, 1, &mut result);
1208
1209        assert!(result.valid, "Record with all valid types should pass");
1210    }
1211
1212    #[test]
1213    fn test_validate_string_coercion_edge_cases() {
1214        let schema = vec![make_field("str", "STRING", "NULLABLE")];
1215        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1216
1217        // Number can be coerced to string
1218        let mut result1 = ValidationResult::new();
1219        let record1 = json!({"str": 12345});
1220        validator.validate_record(&record1, 1, &mut result1);
1221        assert!(result1.valid, "Number should coerce to string");
1222
1223        // Boolean can be coerced to string
1224        let mut result2 = ValidationResult::new();
1225        let record2 = json!({"str": true});
1226        validator.validate_record(&record2, 1, &mut result2);
1227        assert!(result2.valid, "Boolean should coerce to string");
1228
1229        // Array should NOT coerce to string
1230        let mut result3 = ValidationResult::new();
1231        let record3 = json!({"str": [1, 2, 3]});
1232        validator.validate_record(&record3, 1, &mut result3);
1233        assert!(!result3.valid, "Array should not coerce to string");
1234
1235        // Object should NOT coerce to string
1236        let mut result4 = ValidationResult::new();
1237        let record4 = json!({"str": {"nested": "object"}});
1238        validator.validate_record(&record4, 1, &mut result4);
1239        assert!(!result4.valid, "Object should not coerce to string");
1240    }
1241
1242    #[test]
1243    fn test_validate_repeated_records_nested() {
1244        let schema = vec![make_record_field(
1245            "orders",
1246            "REPEATED",
1247            vec![
1248                make_field("id", "INTEGER", "REQUIRED"),
1249                make_record_field(
1250                    "items",
1251                    "REPEATED",
1252                    vec![
1253                        make_field("product_id", "INTEGER", "REQUIRED"),
1254                        make_field("quantity", "INTEGER", "NULLABLE"),
1255                    ],
1256                ),
1257            ],
1258        )];
1259
1260        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1261
1262        // Valid nested repeated records
1263        let mut result = ValidationResult::new();
1264        let record = json!({
1265            "orders": [
1266                {
1267                    "id": 1,
1268                    "items": [
1269                        {"product_id": 100, "quantity": 2},
1270                        {"product_id": 101, "quantity": 1}
1271                    ]
1272                },
1273                {
1274                    "id": 2,
1275                    "items": [
1276                        {"product_id": 200, "quantity": 5}
1277                    ]
1278                }
1279            ]
1280        });
1281        validator.validate_record(&record, 1, &mut result);
1282        assert!(result.valid, "Nested repeated records should be valid");
1283    }
1284
1285    #[test]
1286    fn test_validate_empty_array() {
1287        let schema = vec![make_field("tags", "STRING", "REPEATED")];
1288        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1289
1290        let mut result = ValidationResult::new();
1291        let record = json!({"tags": []});
1292        validator.validate_record(&record, 1, &mut result);
1293        assert!(
1294            result.valid,
1295            "Empty array should be valid for REPEATED field"
1296        );
1297    }
1298
1299    #[test]
1300    fn test_validate_json_data_function() {
1301        let schema = vec![
1302            make_field("name", "STRING", "REQUIRED"),
1303            make_field("age", "INTEGER", "NULLABLE"),
1304        ];
1305
1306        let input = r#"{"name": "Alice", "age": 30}
1307{"name": "Bob", "age": 25}
1308{"name": "Charlie"}"#;
1309
1310        let result = validate_json_data(
1311            std::io::Cursor::new(input),
1312            &schema,
1313            ValidationOptions::default(),
1314        )
1315        .unwrap();
1316
1317        assert!(result.valid);
1318        assert_eq!(result.error_count, 0);
1319    }
1320
1321    #[test]
1322    fn test_validate_json_data_with_errors() {
1323        let schema = vec![
1324            make_field("name", "STRING", "REQUIRED"),
1325            make_field("age", "INTEGER", "NULLABLE"),
1326        ];
1327
1328        let input = r#"{"age": 30}
1329{"name": "Bob", "age": "not a number"}
1330{"name": "Charlie"}"#;
1331
1332        let result = validate_json_data(
1333            std::io::Cursor::new(input),
1334            &schema,
1335            ValidationOptions::default(),
1336        )
1337        .unwrap();
1338
1339        assert!(!result.valid);
1340        // Should have errors for missing required field and type mismatch
1341        assert!(result.error_count >= 2);
1342    }
1343
1344    #[test]
1345    fn test_truncate_value_function() {
1346        // Test through validation error message
1347        let schema = vec![make_field("data", "INTEGER", "NULLABLE")];
1348        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1349
1350        // Create a very long string value
1351        let long_string = "x".repeat(100);
1352        let mut result = ValidationResult::new();
1353        let record = json!({"data": long_string});
1354        validator.validate_record(&record, 1, &mut result);
1355
1356        assert!(!result.valid);
1357        // Error message should have truncated value
1358        assert!(result.errors[0].message.len() < 200);
1359    }
1360
1361    #[test]
1362    fn test_validation_options_defaults() {
1363        let options = ValidationOptions::default();
1364
1365        assert!(!options.allow_unknown);
1366        assert!(!options.strict_types);
1367        assert_eq!(options.max_errors, 100);
1368    }
1369
1370    #[test]
1371    fn test_validate_record_not_object() {
1372        let schema = vec![make_field("name", "STRING", "NULLABLE")];
1373        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1374
1375        // Array at root
1376        let mut result1 = ValidationResult::new();
1377        let record1 = json!([1, 2, 3]);
1378        validator.validate_record(&record1, 1, &mut result1);
1379        assert!(!result1.valid);
1380
1381        // String at root
1382        let mut result2 = ValidationResult::new();
1383        let record2 = json!("just a string");
1384        validator.validate_record(&record2, 1, &mut result2);
1385        assert!(!result2.valid);
1386
1387        // Number at root
1388        let mut result3 = ValidationResult::new();
1389        let record3 = json!(42);
1390        validator.validate_record(&record3, 1, &mut result3);
1391        assert!(!result3.valid);
1392    }
1393
1394    #[test]
1395    fn test_json_type_name_function() {
1396        // Test through error messages
1397        let schema = vec![make_field("field", "RECORD", "NULLABLE")];
1398        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1399
1400        let mut result = ValidationResult::new();
1401        let record = json!({"field": "string value"});
1402        validator.validate_record(&record, 1, &mut result);
1403
1404        assert!(!result.valid);
1405        // Error should mention expected and actual types
1406        assert!(result.errors[0].message.contains("STRING"));
1407        assert!(result.errors[0].message.contains("RECORD"));
1408    }
1409
1410    // ===== Additional Coverage Tests =====
1411
1412    #[test]
1413    fn test_validate_nested_record_multiple_levels_with_type_errors() {
1414        let schema = vec![make_record_field(
1415            "outer",
1416            "NULLABLE",
1417            vec![make_record_field(
1418                "middle",
1419                "NULLABLE",
1420                vec![make_record_field(
1421                    "inner",
1422                    "NULLABLE",
1423                    vec![
1424                        make_field("deep_int", "INTEGER", "REQUIRED"),
1425                        make_field("deep_str", "STRING", "NULLABLE"),
1426                    ],
1427                )],
1428            )],
1429        )];
1430
1431        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1432
1433        // Test with type mismatch at deepest level
1434        let mut result = ValidationResult::new();
1435        let record = json!({
1436            "outer": {
1437                "middle": {
1438                    "inner": {
1439                        "deep_int": "not an int",
1440                        "deep_str": "valid"
1441                    }
1442                }
1443            }
1444        });
1445        validator.validate_record(&record, 1, &mut result);
1446        assert!(!result.valid);
1447        assert!(result.errors[0]
1448            .path
1449            .contains("outer.middle.inner.deep_int"));
1450    }
1451
1452    #[test]
1453    fn test_validate_repeated_field_with_objects_mixed_types() {
1454        let schema = vec![make_record_field(
1455            "items",
1456            "REPEATED",
1457            vec![
1458                make_field("id", "INTEGER", "NULLABLE"),
1459                make_field("name", "STRING", "NULLABLE"),
1460            ],
1461        )];
1462
1463        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1464
1465        // Array with objects where one has wrong type
1466        let mut result = ValidationResult::new();
1467        let record = json!({
1468            "items": [
1469                {"id": 1, "name": "first"},
1470                {"id": "not-an-int", "name": "second"},
1471                {"id": 3, "name": "third"}
1472            ]
1473        });
1474        validator.validate_record(&record, 1, &mut result);
1475        assert!(!result.valid);
1476        assert!(result.errors[0].path.contains("[1]"));
1477    }
1478
1479    #[test]
1480    fn test_validate_repeated_primitives_with_type_errors() {
1481        let schema = vec![make_field("numbers", "INTEGER", "REPEATED")];
1482
1483        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1484
1485        // Array with mixed types
1486        let mut result = ValidationResult::new();
1487        let record = json!({
1488            "numbers": [1, 2, "three", 4, "five"]
1489        });
1490        validator.validate_record(&record, 1, &mut result);
1491        assert!(!result.valid);
1492        assert!(result.error_count >= 2);
1493    }
1494
1495    #[test]
1496    fn test_validate_all_datetime_formats_comprehensive() {
1497        let schema = vec![
1498            make_field("date", "DATE", "NULLABLE"),
1499            make_field("time", "TIME", "NULLABLE"),
1500            make_field("timestamp", "TIMESTAMP", "NULLABLE"),
1501        ];
1502
1503        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1504
1505        // Valid formats
1506        let valid_cases = vec![
1507            json!({"date": "2024-01-15", "time": "12:30:45", "timestamp": "2024-01-15T12:30:45"}),
1508            json!({"date": "2024-12-31", "time": "23:59:59", "timestamp": "2024-01-15T12:30:45Z"}),
1509            json!({"date": "2024-2-1", "time": "1:2:3", "timestamp": "2024-01-15T12:30:45.123456"}),
1510        ];
1511
1512        for case in valid_cases {
1513            let mut result = ValidationResult::new();
1514            validator.validate_record(&case, 1, &mut result);
1515            assert!(result.valid, "Expected valid: {:?}", case);
1516        }
1517
1518        // Invalid formats - completely wrong formats
1519        let invalid_cases = vec![
1520            ("date", json!({"date": "not-a-date"})),
1521            ("timestamp", json!({"timestamp": "not-a-timestamp"})),
1522        ];
1523
1524        for (field, case) in invalid_cases {
1525            let mut result = ValidationResult::new();
1526            validator.validate_record(&case, 1, &mut result);
1527            assert!(!result.valid, "Expected invalid for {}: {:?}", field, case);
1528        }
1529    }
1530
1531    #[test]
1532    fn test_validate_null_handling_in_arrays() {
1533        let schema = vec![
1534            make_field("int_array", "INTEGER", "REPEATED"),
1535            make_field("str_array", "STRING", "REPEATED"),
1536            make_record_field(
1537                "record_array",
1538                "REPEATED",
1539                vec![make_field("id", "INTEGER", "NULLABLE")],
1540            ),
1541        ];
1542
1543        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1544
1545        // All arrays with nulls should be valid
1546        let mut result = ValidationResult::new();
1547        let record = json!({
1548            "int_array": [1, null, 2, null],
1549            "str_array": ["a", null, "b"],
1550            "record_array": [{"id": 1}, null, {"id": 2}]
1551        });
1552        validator.validate_record(&record, 1, &mut result);
1553        assert!(result.valid, "Nulls in arrays should be valid");
1554    }
1555
1556    #[test]
1557    fn test_validate_type_coercion_failures_strict_mode() {
1558        let schema = vec![
1559            make_field("int_field", "INTEGER", "NULLABLE"),
1560            make_field("float_field", "FLOAT", "NULLABLE"),
1561            make_field("bool_field", "BOOLEAN", "NULLABLE"),
1562        ];
1563
1564        let strict_options = ValidationOptions {
1565            strict_types: true,
1566            ..Default::default()
1567        };
1568        let validator = SchemaValidator::new(&schema, strict_options);
1569
1570        // String representations should fail in strict mode
1571        let mut result = ValidationResult::new();
1572        let record = json!({
1573            "int_field": "123",
1574            "float_field": "3.14",
1575            "bool_field": "true"
1576        });
1577        validator.validate_record(&record, 1, &mut result);
1578        assert!(!result.valid);
1579        assert_eq!(result.error_count, 3);
1580    }
1581
1582    #[test]
1583    fn test_validate_type_coercion_success_lenient_mode() {
1584        let schema = vec![
1585            make_field("int_field", "INTEGER", "NULLABLE"),
1586            make_field("float_field", "FLOAT", "NULLABLE"),
1587            make_field("bool_field", "BOOLEAN", "NULLABLE"),
1588            make_field("ts_field", "TIMESTAMP", "NULLABLE"),
1589        ];
1590
1591        let lenient_options = ValidationOptions {
1592            strict_types: false,
1593            ..Default::default()
1594        };
1595        let validator = SchemaValidator::new(&schema, lenient_options);
1596
1597        // String representations should pass in lenient mode
1598        let mut result = ValidationResult::new();
1599        let record = json!({
1600            "int_field": "123",
1601            "float_field": "3.14",
1602            "bool_field": "true",
1603            "ts_field": 1609459200  // Unix timestamp
1604        });
1605        validator.validate_record(&record, 1, &mut result);
1606        assert!(result.valid, "String coercion should work in lenient mode");
1607    }
1608
1609    #[test]
1610    fn test_validate_repeated_vs_non_repeated_mode_conflict() {
1611        let schema = vec![make_field("tags", "STRING", "REPEATED")];
1612
1613        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1614
1615        // Non-array value for REPEATED field should fail
1616        let mut result = ValidationResult::new();
1617        let record = json!({"tags": "single-value"});
1618        validator.validate_record(&record, 1, &mut result);
1619        assert!(!result.valid);
1620        assert!(result.errors[0].message.contains("ARRAY"));
1621    }
1622
1623    #[test]
1624    fn test_validate_u64_max_value_for_integer() {
1625        let schema = vec![make_field("big_int", "INTEGER", "NULLABLE")];
1626
1627        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1628
1629        // u64::MAX as number
1630        let mut result = ValidationResult::new();
1631        let record = json!({"big_int": 18446744073709551615_u64});
1632        validator.validate_record(&record, 1, &mut result);
1633        // u64 should be valid as it can be represented as u64
1634        assert!(result.valid);
1635    }
1636
1637    #[test]
1638    fn test_validate_nested_record_with_unknown_fields() {
1639        let schema = vec![make_record_field(
1640            "user",
1641            "NULLABLE",
1642            vec![
1643                make_field("id", "INTEGER", "NULLABLE"),
1644                make_record_field(
1645                    "profile",
1646                    "NULLABLE",
1647                    vec![make_field("name", "STRING", "NULLABLE")],
1648                ),
1649            ],
1650        )];
1651
1652        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1653
1654        // Unknown fields at different nesting levels
1655        let mut result = ValidationResult::new();
1656        let record = json!({
1657            "user": {
1658                "id": 1,
1659                "unknown_top": "should fail",
1660                "profile": {
1661                    "name": "test",
1662                    "unknown_nested": "should also fail"
1663                }
1664            }
1665        });
1666        validator.validate_record(&record, 1, &mut result);
1667        assert!(!result.valid);
1668        assert!(result.error_count >= 2);
1669    }
1670
1671    #[test]
1672    fn test_validate_max_errors_stops_early() {
1673        let schema = vec![
1674            make_field("a", "INTEGER", "NULLABLE"),
1675            make_field("b", "INTEGER", "NULLABLE"),
1676            make_field("c", "INTEGER", "NULLABLE"),
1677            make_field("d", "INTEGER", "NULLABLE"),
1678            make_field("e", "INTEGER", "NULLABLE"),
1679        ];
1680
1681        let options = ValidationOptions {
1682            max_errors: 2,
1683            ..Default::default()
1684        };
1685        let validator = SchemaValidator::new(&schema, options);
1686
1687        // All fields have wrong types
1688        let mut result = ValidationResult::new();
1689        let record = json!({
1690            "a": "x",
1691            "b": "y",
1692            "c": "z",
1693            "d": "w",
1694            "e": "v"
1695        });
1696        validator.validate_record(&record, 1, &mut result);
1697
1698        // Should stop at max_errors
1699        assert_eq!(result.error_count, 2);
1700    }
1701
1702    #[test]
1703    fn test_validate_record_with_non_object_nested() {
1704        let schema = vec![make_record_field(
1705            "data",
1706            "NULLABLE",
1707            vec![make_field("value", "STRING", "NULLABLE")],
1708        )];
1709
1710        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1711
1712        // Array where RECORD is expected
1713        let mut result1 = ValidationResult::new();
1714        let record1 = json!({"data": [1, 2, 3]});
1715        validator.validate_record(&record1, 1, &mut result1);
1716        assert!(!result1.valid);
1717        assert!(result1.errors[0].message.contains("RECORD"));
1718
1719        // String where RECORD is expected
1720        let mut result2 = ValidationResult::new();
1721        let record2 = json!({"data": "not an object"});
1722        validator.validate_record(&record2, 1, &mut result2);
1723        assert!(!result2.valid);
1724
1725        // Number where RECORD is expected
1726        let mut result3 = ValidationResult::new();
1727        let record3 = json!({"data": 123});
1728        validator.validate_record(&record3, 1, &mut result3);
1729        assert!(!result3.valid);
1730    }
1731
1732    #[test]
1733    fn test_validate_boolean_string_variations() {
1734        let schema = vec![make_field("flag", "BOOLEAN", "NULLABLE")];
1735
1736        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1737
1738        // Various boolean string representations
1739        let valid_values = vec!["true", "false", "True", "False", "TRUE", "FALSE"];
1740        for val in valid_values {
1741            let mut result = ValidationResult::new();
1742            let record = json!({"flag": val});
1743            validator.validate_record(&record, 1, &mut result);
1744            assert!(result.valid, "Boolean string '{}' should be valid", val);
1745        }
1746
1747        // Invalid boolean strings
1748        let invalid_values = vec!["yes", "no", "1", "0", "on", "off"];
1749        for val in invalid_values {
1750            let mut result = ValidationResult::new();
1751            let record = json!({"flag": val});
1752            validator.validate_record(&record, 1, &mut result);
1753            assert!(!result.valid, "Boolean string '{}' should be invalid", val);
1754        }
1755    }
1756
1757    #[test]
1758    fn test_validate_float_accepts_integer() {
1759        let schema = vec![make_field("amount", "FLOAT", "NULLABLE")];
1760
1761        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1762
1763        // Integer should be valid for FLOAT field
1764        let mut result = ValidationResult::new();
1765        let record = json!({"amount": 42});
1766        validator.validate_record(&record, 1, &mut result);
1767        assert!(result.valid, "Integer should be valid for FLOAT field");
1768    }
1769
1770    #[test]
1771    fn test_validate_integer_string_with_leading_zeros() {
1772        let schema = vec![make_field("code", "INTEGER", "NULLABLE")];
1773
1774        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1775
1776        // String integers with various formats
1777        let mut result = ValidationResult::new();
1778        let record = json!({"code": "00123"});
1779        validator.validate_record(&record, 1, &mut result);
1780        assert!(
1781            result.valid,
1782            "Integer string with leading zeros should be valid"
1783        );
1784    }
1785
1786    #[test]
1787    fn test_validate_float_string_scientific_notation() {
1788        let schema = vec![make_field("value", "FLOAT", "NULLABLE")];
1789
1790        let validator = SchemaValidator::new(&schema, ValidationOptions::default());
1791
1792        // Scientific notation strings
1793        let valid_floats = vec!["1e10", "1.5e-3", "3.14E+2", "-1.5e10"];
1794        for val in valid_floats {
1795            let mut result = ValidationResult::new();
1796            let record = json!({"value": val});
1797            validator.validate_record(&record, 1, &mut result);
1798            assert!(result.valid, "Float string '{}' should be valid", val);
1799        }
1800    }
1801
1802    #[test]
1803    fn test_validation_result_reached_max_errors() {
1804        let mut result = ValidationResult::new();
1805        assert!(!result.reached_max_errors(10));
1806
1807        for i in 0..10 {
1808            result.add_error(ValidationError::missing_required(i, "field"));
1809        }
1810        assert!(result.reached_max_errors(10));
1811        assert!(!result.reached_max_errors(11));
1812    }
1813
1814    #[test]
1815    fn test_validate_json_data_all_valid() {
1816        let schema = vec![make_field("name", "STRING", "REQUIRED")];
1817
1818        let input = r#"{"name": "valid"}
1819{"name": "also valid"}"#;
1820
1821        let result = validate_json_data(
1822            std::io::Cursor::new(input),
1823            &schema,
1824            ValidationOptions::default(),
1825        )
1826        .unwrap();
1827
1828        // All records are valid
1829        assert!(result.valid);
1830        assert_eq!(result.error_count, 0);
1831    }
1832
1833    #[test]
1834    fn test_validate_json_data_missing_required() {
1835        let schema = vec![make_field("name", "STRING", "REQUIRED")];
1836
1837        let input = r#"{"name": "valid"}
1838{"wrong_field": "missing required"}
1839{"name": "also valid"}"#;
1840
1841        let result = validate_json_data(
1842            std::io::Cursor::new(input),
1843            &schema,
1844            ValidationOptions::default(),
1845        )
1846        .unwrap();
1847
1848        // Should have error for missing required field
1849        assert!(!result.valid);
1850        assert!(result.error_count >= 1);
1851    }
1852}