base_d/encoders/algorithms/schema/
fiche_analyzer.rs1use serde_json::Value;
2
3#[derive(Debug, Clone, Copy)]
5pub enum DetectedMode {
6 Full,
7 Path,
8}
9
10pub fn detect_fiche_mode(json: &str) -> DetectedMode {
12 let value: Value = match serde_json::from_str(json) {
13 Ok(v) => v,
14 Err(_) => return DetectedMode::Full, };
16
17 let analysis = analyze_structure(&value, 0);
18
19 if is_homogeneous_array(&value) {
27 return DetectedMode::Full;
28 }
29
30 if analysis.has_varying_array_structure {
32 return DetectedMode::Path;
33 }
34
35 if analysis.max_depth > 3 && analysis.has_indexed_arrays {
37 return DetectedMode::Path;
38 }
39
40 if analysis.unique_paths > 50 {
42 return DetectedMode::Path;
43 }
44
45 DetectedMode::Full
47}
48
49#[derive(Default)]
50struct StructureAnalysis {
51 max_depth: usize,
52 unique_paths: usize,
53 has_indexed_arrays: bool,
54 has_varying_array_structure: bool,
55}
56
57fn analyze_structure(value: &Value, depth: usize) -> StructureAnalysis {
58 let mut analysis = StructureAnalysis {
59 max_depth: depth,
60 ..Default::default()
61 };
62
63 match value {
64 Value::Object(map) => {
65 let mut paths = 0;
66 for (_, v) in map {
67 let child = analyze_structure(v, depth + 1);
68 analysis.max_depth = analysis.max_depth.max(child.max_depth);
69 paths += child.unique_paths.max(1);
70 analysis.has_indexed_arrays |= child.has_indexed_arrays;
71 analysis.has_varying_array_structure |= child.has_varying_array_structure;
72 }
73 analysis.unique_paths = paths;
74 }
75 Value::Array(arr) => {
76 if arr.is_empty() {
77 analysis.unique_paths = 1;
78 return analysis;
79 }
80
81 let first_type = type_signature(&arr[0]);
83 let mut max_child_analysis = StructureAnalysis::default();
84
85 if matches!(arr[0], Value::Object(_)) {
87 let first_keys = if let Value::Object(map) = &arr[0] {
88 map.keys().collect::<Vec<_>>()
89 } else {
90 vec![]
91 };
92
93 for item in arr.iter().skip(1) {
94 if type_signature(item) != first_type {
95 analysis.has_varying_array_structure = true;
96 }
97
98 if let Value::Object(map) = item {
100 let keys = map.keys().collect::<Vec<_>>();
101 if keys != first_keys {
102 analysis.has_varying_array_structure = true;
103 }
104 }
105 }
106 } else {
107 for item in arr.iter().skip(1) {
109 if type_signature(item) != first_type {
110 analysis.has_varying_array_structure = true;
111 }
112 }
113 }
114
115 for item in arr {
117 let child = analyze_structure(item, depth + 1);
118 max_child_analysis.max_depth = max_child_analysis.max_depth.max(child.max_depth);
119 max_child_analysis.unique_paths =
120 max_child_analysis.unique_paths.max(child.unique_paths);
121 max_child_analysis.has_indexed_arrays |= child.has_indexed_arrays;
122 max_child_analysis.has_varying_array_structure |= child.has_varying_array_structure;
123 }
124
125 if matches!(arr[0], Value::Object(_)) {
127 analysis.has_indexed_arrays = true;
128 }
129
130 if matches!(arr[0], Value::Object(_)) {
132 analysis.unique_paths = max_child_analysis.unique_paths * arr.len();
133 } else {
134 analysis.unique_paths = 1;
135 }
136
137 analysis.max_depth = max_child_analysis.max_depth;
138 analysis.has_indexed_arrays |= max_child_analysis.has_indexed_arrays;
139 analysis.has_varying_array_structure |= max_child_analysis.has_varying_array_structure;
140 }
141 _ => {
142 analysis.unique_paths = 1;
143 }
144 }
145
146 analysis
147}
148
149fn type_signature(value: &Value) -> &str {
150 match value {
151 Value::Null => "null",
152 Value::Bool(_) => "bool",
153 Value::Number(_) => "number",
154 Value::String(_) => "string",
155 Value::Array(_) => "array",
156 Value::Object(_) => "object",
157 }
158}
159
160fn is_homogeneous_array(value: &Value) -> bool {
161 match value {
162 Value::Array(arr) => {
163 if arr.is_empty() {
164 return false;
165 }
166
167 let first = match &arr[0] {
169 Value::Object(map) => map,
170 _ => return false,
171 };
172
173 let first_keys: Vec<_> = first.keys().collect();
174
175 for item in arr.iter().skip(1) {
176 match item {
177 Value::Object(map) => {
178 let keys: Vec<_> = map.keys().collect();
179 if keys.len() != first_keys.len() {
180 return false;
181 }
182 for key in &first_keys {
183 if !keys.contains(key) {
184 return false;
185 }
186 }
187 }
188 _ => return false,
189 }
190 }
191 true
192 }
193 Value::Object(map) => {
194 if map.len() == 1 {
196 for (key, value) in map {
197 if matches!(
198 key.as_str(),
199 "results" | "data" | "items" | "records" | "rows"
200 ) {
201 return is_homogeneous_array(value);
202 }
203 }
204 }
205 false
206 }
207 _ => false,
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_detect_homogeneous_array() {
217 let json = r#"[{"name":"alice"},{"name":"bob"}]"#;
218 let mode = detect_fiche_mode(json);
219 assert!(matches!(mode, DetectedMode::Full));
220 }
221
222 #[test]
223 fn test_detect_deep_nested() {
224 let json = r#"{"a":{"b":{"c":{"d":{"e":[{"f":1},{"f":2}]}}}}}"#;
227 let mode = detect_fiche_mode(json);
228 assert!(matches!(mode, DetectedMode::Path));
229 }
230
231 #[test]
232 fn test_detect_varying_structure() {
233 let json = r#"{"items":[{"type":"a","x":1},{"type":"b","y":2}]}"#;
234 let mode = detect_fiche_mode(json);
235 assert!(matches!(mode, DetectedMode::Path));
237 }
238
239 #[test]
240 fn test_detect_simple_object() {
241 let json = r#"{"id":1,"name":"alice"}"#;
242 let mode = detect_fiche_mode(json);
243 assert!(matches!(mode, DetectedMode::Full));
244 }
245
246 #[test]
247 fn test_detect_wrapper_key() {
248 let json = r#"{"results":[{"id":1},{"id":2}]}"#;
249 let mode = detect_fiche_mode(json);
250 assert!(matches!(mode, DetectedMode::Full));
251 }
252}