base_d/encoders/algorithms/schema/
stele_analyzer.rs1use serde_json::Value;
2
3#[derive(Debug, Clone, Copy)]
5pub enum DetectedMode {
6 Full,
7 Path,
8}
9
10pub fn detect_stele_mode(json: &str) -> DetectedMode {
12 let value: Value = match serde_json::from_str(json) {
13 Ok(v) => v,
14 Err(_) => return DetectedMode::Full, };
16
17 let analysis = analyze_structure(&value, 0);
18
19 if analysis.unique_paths > 50 {
28 return DetectedMode::Path;
29 }
30
31 if analysis.max_depth > 3 && analysis.has_indexed_arrays {
33 return DetectedMode::Path;
34 }
35
36 if analysis.has_varying_array_structure {
38 return DetectedMode::Path;
39 }
40
41 if is_homogeneous_array(&value) {
43 return DetectedMode::Full;
44 }
45
46 DetectedMode::Full
48}
49
50#[derive(Default)]
51struct StructureAnalysis {
52 max_depth: usize,
53 unique_paths: usize,
54 has_indexed_arrays: bool,
55 has_varying_array_structure: bool,
56}
57
58fn analyze_structure(value: &Value, depth: usize) -> StructureAnalysis {
59 let mut analysis = StructureAnalysis {
60 max_depth: depth,
61 ..Default::default()
62 };
63
64 match value {
65 Value::Object(map) => {
66 let mut paths = 0;
67 for (_, v) in map {
68 let child = analyze_structure(v, depth + 1);
69 analysis.max_depth = analysis.max_depth.max(child.max_depth);
70 paths += child.unique_paths.max(1);
71 analysis.has_indexed_arrays |= child.has_indexed_arrays;
72 analysis.has_varying_array_structure |= child.has_varying_array_structure;
73 }
74 analysis.unique_paths = paths;
75 }
76 Value::Array(arr) => {
77 if arr.is_empty() {
78 analysis.unique_paths = 1;
79 return analysis;
80 }
81
82 let first_type = type_signature(&arr[0]);
84 let mut max_child_analysis = StructureAnalysis::default();
85
86 if matches!(arr[0], Value::Object(_)) {
88 let first_keys = if let Value::Object(map) = &arr[0] {
89 map.keys().collect::<Vec<_>>()
90 } else {
91 vec![]
92 };
93
94 for item in arr.iter().skip(1) {
95 if type_signature(item) != first_type {
96 analysis.has_varying_array_structure = true;
97 }
98
99 if let Value::Object(map) = item {
101 let keys = map.keys().collect::<Vec<_>>();
102 if keys != first_keys {
103 analysis.has_varying_array_structure = true;
104 }
105 }
106 }
107 } else {
108 for item in arr.iter().skip(1) {
110 if type_signature(item) != first_type {
111 analysis.has_varying_array_structure = true;
112 }
113 }
114 }
115
116 for item in arr {
118 let child = analyze_structure(item, depth + 1);
119 max_child_analysis.max_depth = max_child_analysis.max_depth.max(child.max_depth);
120 max_child_analysis.unique_paths =
121 max_child_analysis.unique_paths.max(child.unique_paths);
122 max_child_analysis.has_indexed_arrays |= child.has_indexed_arrays;
123 max_child_analysis.has_varying_array_structure |= child.has_varying_array_structure;
124 }
125
126 if matches!(arr[0], Value::Object(_)) {
128 analysis.has_indexed_arrays = true;
129 }
130
131 if matches!(arr[0], Value::Object(_)) {
133 analysis.unique_paths = max_child_analysis.unique_paths * arr.len();
134 } else {
135 analysis.unique_paths = 1;
136 }
137
138 analysis.max_depth = max_child_analysis.max_depth;
139 analysis.has_indexed_arrays |= max_child_analysis.has_indexed_arrays;
140 analysis.has_varying_array_structure |= max_child_analysis.has_varying_array_structure;
141 }
142 _ => {
143 analysis.unique_paths = 1;
144 }
145 }
146
147 analysis
148}
149
150fn type_signature(value: &Value) -> &str {
151 match value {
152 Value::Null => "null",
153 Value::Bool(_) => "bool",
154 Value::Number(_) => "number",
155 Value::String(_) => "string",
156 Value::Array(_) => "array",
157 Value::Object(_) => "object",
158 }
159}
160
161fn is_homogeneous_array(value: &Value) -> bool {
162 match value {
163 Value::Array(arr) => {
164 if arr.is_empty() {
165 return false;
166 }
167
168 let first = match &arr[0] {
170 Value::Object(map) => map,
171 _ => return false,
172 };
173
174 let first_keys: Vec<_> = first.keys().collect();
175
176 for item in arr.iter().skip(1) {
177 match item {
178 Value::Object(map) => {
179 let keys: Vec<_> = map.keys().collect();
180 if keys.len() != first_keys.len() {
181 return false;
182 }
183 for key in &first_keys {
184 if !keys.contains(key) {
185 return false;
186 }
187 }
188 }
189 _ => return false,
190 }
191 }
192 true
193 }
194 Value::Object(map) => {
195 if map.len() == 1 {
197 for (key, value) in map {
198 if matches!(
199 key.as_str(),
200 "results" | "data" | "items" | "records" | "rows"
201 ) {
202 return is_homogeneous_array(value);
203 }
204 }
205 }
206 false
207 }
208 _ => false,
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn test_detect_homogeneous_array() {
218 let json = r#"[{"name":"alice"},{"name":"bob"}]"#;
219 let mode = detect_stele_mode(json);
220 assert!(matches!(mode, DetectedMode::Full));
221 }
222
223 #[test]
224 fn test_detect_deep_nested() {
225 let json = r#"{"a":{"b":{"c":{"d":{"e":[{"f":1},{"f":2}]}}}}}"#;
228 let mode = detect_stele_mode(json);
229 assert!(matches!(mode, DetectedMode::Path));
230 }
231
232 #[test]
233 fn test_detect_varying_structure() {
234 let json = r#"{"items":[{"type":"a","x":1},{"type":"b","y":2}]}"#;
235 let mode = detect_stele_mode(json);
236 assert!(matches!(mode, DetectedMode::Path));
238 }
239
240 #[test]
241 fn test_detect_simple_object() {
242 let json = r#"{"id":1,"name":"alice"}"#;
243 let mode = detect_stele_mode(json);
244 assert!(matches!(mode, DetectedMode::Full));
245 }
246
247 #[test]
248 fn test_detect_wrapper_key() {
249 let json = r#"{"results":[{"id":1},{"id":2}]}"#;
250 let mode = detect_stele_mode(json);
251 assert!(matches!(mode, DetectedMode::Full));
252 }
253}