1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct SchemaInferenceConfig {
7 pub ignore_outer_array: bool,
9 pub delimiter: Option<u8>,
11 pub schema_uri: Option<String>,
13 pub map_threshold: usize,
15 pub map_max_required_keys: Option<usize>,
18 pub unify_maps: bool,
20 pub no_unify: std::collections::HashSet<String>,
22 pub force_field_types: HashMap<String, String>,
24 pub force_parent_field_types: HashMap<String, String>,
28 pub force_scalar_promotion: std::collections::HashSet<String>,
32 pub wrap_scalars: bool,
36 pub wrap_root: Option<String>,
40 pub no_root_map: bool,
42 pub max_builders: Option<usize>,
46 #[cfg(feature = "avro")]
48 pub avro: bool,
49 pub debug: bool,
52 pub profile: bool,
54 pub verbosity: DebugVerbosity,
56}
57
58#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
59pub enum DebugVerbosity {
60 #[default]
62 Normal,
63 Verbose,
65}
66
67impl SchemaInferenceConfig {
68 pub(crate) fn profile(&self, args: std::fmt::Arguments) {
69 if self.profile {
70 let message = format!("{}", args);
71 anstream::eprintln!("{}", message);
72 }
73 }
74
75 pub(crate) fn profile_verbose(&self, args: std::fmt::Arguments) {
76 if self.profile && matches!(self.verbosity, DebugVerbosity::Verbose) {
77 let message = format!("{}", args);
78 anstream::eprintln!("{}", message);
79 }
80 }
81
82 pub(crate) fn debug(&self, args: std::fmt::Arguments) {
83 if self.debug {
84 let message = format!("{}", args);
85 anstream::eprintln!("{}", self.maybe_truncate(message));
86 }
87 }
88
89 pub(crate) fn debug_verbose(&self, args: std::fmt::Arguments) {
90 if self.debug && matches!(self.verbosity, DebugVerbosity::Verbose) {
91 let message = format!("{}", args);
92 anstream::eprintln!("{}", self.maybe_truncate(message));
93 }
94 }
95
96 fn maybe_truncate(&self, message: String) -> String {
97 let lines: Vec<&str> = message.lines().collect();
98
99 if lines.len() > 20 && self.verbosity == DebugVerbosity::Normal {
100 let mut truncated = String::new();
101
102 for line in lines.iter().take(10) {
104 truncated.push_str(line);
105 truncated.push('\n');
106 }
107
108 truncated.push_str(&format!("... ({} lines truncated) ...\n", lines.len() - 15));
109
110 for line in lines.iter().skip(lines.len() - 5) {
112 truncated.push_str(line);
113 truncated.push('\n');
114 }
115
116 truncated
117 } else {
118 message
119 }
120 }
121}
122
123impl Default for SchemaInferenceConfig {
124 fn default() -> Self {
125 Self {
126 ignore_outer_array: true,
127 delimiter: None,
128 schema_uri: Some("AUTO".to_string()),
129 map_threshold: 20,
130 map_max_required_keys: None,
131 unify_maps: false,
132 no_unify: std::collections::HashSet::new(),
133 force_field_types: std::collections::HashMap::new(),
134 force_parent_field_types: std::collections::HashMap::new(),
135 force_scalar_promotion: std::collections::HashSet::new(),
136 wrap_scalars: true,
137 wrap_root: None,
138 no_root_map: true,
139 max_builders: None,
140 #[cfg(feature = "avro")]
141 avro: false,
142 debug: false,
143 profile: false,
144 verbosity: DebugVerbosity::default(),
145 }
146 }
147}
148
149#[macro_export]
150macro_rules! profile {
151 ($cfg:expr, $($arg:tt)*) => {
152 $cfg.profile(format_args!($($arg)*))
153 };
154}
155
156#[macro_export]
157macro_rules! profile_verbose {
158 ($cfg:expr, $($arg:tt)*) => {
159 $cfg.profile_verbose(format_args!($($arg)*))
160 };
161}
162
163#[macro_export]
164macro_rules! debug {
165 ($cfg:expr, $($arg:tt)*) => {
166 $cfg.debug(format_args!($($arg)*))
167 };
168}
169
170#[macro_export]
171macro_rules! debug_verbose {
172 ($cfg:expr, $($arg:tt)*) => {
173 $cfg.debug_verbose(format_args!($($arg)*))
174 };
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct SchemaInferenceResult {
179 pub schema: Value,
180 pub processed_count: usize,
181}
182
183#[cfg(feature = "avro")]
184impl SchemaInferenceResult {
185 pub fn to_avro_schema(
186 &self,
187 namespace: &str,
188 utility_namespace: Option<&str>,
189 base_uri: Option<&str>,
190 split_top_level: bool,
191 ) -> Value {
192 avrotize::converter::jsons_to_avro(
193 &self.schema,
194 namespace,
195 utility_namespace.unwrap_or(""),
196 base_uri.unwrap_or("genson-core"),
197 split_top_level,
198 )
199 }
200}
201
202pub fn make_promoted_scalar_key(field_prefix: &str, scalar_type: &str) -> String {
207 format!("{}__{}", field_prefix, scalar_type)
209}