1pub mod diff;
27pub mod error;
28pub mod inference;
29pub mod input;
30pub mod output;
31pub mod schema;
32pub mod validate;
33pub mod watch;
34
35pub use error::{Error, ErrorLog, Result};
37pub use input::{CsvRecordIterator, JsonRecordIterator};
38pub use output::{
39 schema_to_json_string, write_schema_ddl, write_schema_debug_map, write_schema_json,
40 write_schema_json_schema, OutputFormat,
41};
42pub use schema::{
43 bq_schema_to_map, read_existing_schema_from_file, BqMode, BqSchemaField, BqType, EntryStatus,
44 GeneratorConfig, InputFormat, SchemaEntry, SchemaGenerator, SchemaMap,
45};
46pub use validate::{
47 validate_json_data, SchemaValidator, ValidationError, ValidationErrorType, ValidationOptions,
48 ValidationResult,
49};
50pub use watch::{run_watch, WatchConfig, WatchState};
51
52use std::io::{BufRead, Read, Write};
53
54pub fn generate_schema_from_json<R: BufRead, W: Write>(
58 input: R,
59 output: &mut W,
60 config: GeneratorConfig,
61 ignore_invalid_lines: bool,
62 debugging_interval: Option<usize>,
63 existing_schema: Option<SchemaMap>,
64) -> Result<Vec<ErrorLog>> {
65 let mut generator = SchemaGenerator::new(config);
66 let mut schema_map = existing_schema.unwrap_or_default();
67
68 let iter = JsonRecordIterator::new(input, ignore_invalid_lines);
69
70 for result in iter {
71 let (line_num, record) = result?;
72
73 if let Some(interval) = debugging_interval {
74 if line_num % interval == 0 {
75 eprintln!("Processing line {}", line_num);
76 }
77 }
78
79 if let Err(e) = generator.process_record(&record, &mut schema_map) {
80 if !ignore_invalid_lines {
81 return Err(e);
82 }
83 }
84 }
85
86 eprintln!("Processed {} lines", generator.line_number());
87
88 let schema = generator.flatten_schema(&schema_map);
89 write_schema_json(&schema, output)?;
90
91 Ok(generator.error_logs().to_vec())
92}
93
94pub fn generate_schema_from_csv<R: Read, W: Write>(
98 input: R,
99 output: &mut W,
100 config: GeneratorConfig,
101 debugging_interval: Option<usize>,
102 existing_schema: Option<SchemaMap>,
103) -> Result<Vec<ErrorLog>> {
104 let mut generator = SchemaGenerator::new(config);
105 let mut schema_map = existing_schema.unwrap_or_default();
106
107 let iter = CsvRecordIterator::new(input)?;
108
109 for result in iter {
110 let (line_num, record) = result?;
111
112 if let Some(interval) = debugging_interval {
113 if line_num % interval == 0 {
114 eprintln!("Processing line {}", line_num);
115 }
116 }
117
118 generator.process_record(&record, &mut schema_map)?;
119 }
120
121 eprintln!("Processed {} lines", generator.line_number());
122
123 let schema = generator.flatten_schema(&schema_map);
124 write_schema_json(&schema, output)?;
125
126 Ok(generator.error_logs().to_vec())
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132 use std::io::Cursor;
133
134 #[test]
135 fn test_generate_schema_from_json() {
136 let input = r#"{"name": "test", "value": 42}
137{"name": "foo", "value": 123, "active": true}"#;
138 let cursor = Cursor::new(input);
139 let mut output = Vec::new();
140
141 let config = GeneratorConfig::default();
142 let errors =
143 generate_schema_from_json(cursor, &mut output, config, false, None, None).unwrap();
144
145 assert!(errors.is_empty());
146
147 let output_str = String::from_utf8(output).unwrap();
148 assert!(output_str.contains("\"name\""));
149 assert!(output_str.contains("\"value\""));
150 assert!(output_str.contains("\"active\""));
151 }
152
153 #[test]
154 fn test_generate_schema_from_csv() {
155 let input = "name,value,active\ntest,42,true\nfoo,123,false";
156 let cursor = Cursor::new(input);
157 let mut output = Vec::new();
158
159 let config = GeneratorConfig {
160 input_format: InputFormat::Csv,
161 ..Default::default()
162 };
163
164 let errors = generate_schema_from_csv(cursor, &mut output, config, None, None).unwrap();
165
166 assert!(errors.is_empty());
167
168 let output_str = String::from_utf8(output).unwrap();
169 assert!(output_str.contains("\"name\""));
170 assert!(output_str.contains("\"value\""));
171 assert!(output_str.contains("\"active\""));
172 }
173}