1use crate::DEFAULT_SCHEMA;
21use hedl_core::convert::parse_reference;
22use hedl_core::lex::Tensor;
23use hedl_core::lex::{parse_expression_token, singularize_and_capitalize};
24use hedl_core::{Document, Item, MatrixList, Node, Value};
25use serde_json::{Map, Value as JsonValue};
26use smallvec::SmallVec;
27use std::collections::{BTreeMap, HashMap};
28
29pub const DEFAULT_MAX_DEPTH: usize = 10_000;
35
36pub const DEFAULT_MAX_ARRAY_SIZE: usize = 10_000_000;
41
42pub const DEFAULT_MAX_STRING_LENGTH: usize = 100 * 1024 * 1024;
47
48pub const DEFAULT_MAX_OBJECT_SIZE: usize = 100_000;
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
84pub enum SurrogatePolicy {
85 #[default]
90 Reject,
91
92 ReplaceWithFFFD,
97
98 Skip,
103}
104
105#[derive(Debug, Clone, thiserror::Error)]
107pub enum JsonConversionError {
108 #[error("JSON parse error: {0}")]
110 ParseError(String),
111
112 #[error("Root must be a JSON object, found {0}")]
114 InvalidRoot(String),
115
116 #[error("Invalid number: {0}")]
118 InvalidNumber(String),
119
120 #[error("Invalid expression: {0}")]
122 InvalidExpression(String),
123
124 #[error("Invalid tensor element - must be number or array")]
126 InvalidTensor,
127
128 #[error("Nested objects not allowed in scalar context")]
130 NestedObject,
131
132 #[error("Invalid reference: {0}")]
134 InvalidReference(String),
135
136 #[error("Invalid Unicode: {0}")]
159 InvalidUnicode(String),
160
161 #[error("Maximum recursion depth ({0}) exceeded - possible deeply nested structure")]
163 MaxDepthExceeded(usize),
164
165 #[error("Maximum array size ({0}) exceeded - array has {1} elements")]
167 MaxArraySizeExceeded(usize, usize),
168
169 #[error("Maximum string length ({0}) exceeded - string has {1} characters")]
171 MaxStringLengthExceeded(usize, usize),
172
173 #[error("Maximum object size ({0}) exceeded - object has {1} keys")]
175 MaxObjectSizeExceeded(usize, usize),
176
177 #[error(
219 "Integer overflow: {value} exceeds i64 range [{min}..{max}]. \
220 Consider using a string for large IDs or timestamps."
221 )]
222 IntegerOverflow {
223 value: String,
225 max: i64,
227 min: i64,
229 },
230}
231
232impl From<serde_json::Error> for JsonConversionError {
233 fn from(err: serde_json::Error) -> Self {
234 let msg = err.to_string();
235
236 if msg.contains("lone surrogate")
238 || msg.contains("surrogate")
239 || msg.contains("invalid unicode")
240 {
241 JsonConversionError::InvalidUnicode(format!(
242 "Invalid UTF-16 surrogate sequence: {msg}. \
243 JSON contains unpaired surrogates which cannot be represented \
244 in Rust UTF-8 strings. Configure SurrogatePolicy::ReplaceWithFFFD \
245 to replace with the Unicode replacement character (U+FFFD)."
246 ))
247 } else if msg.contains("control character") {
248 JsonConversionError::InvalidUnicode(format!(
249 "Unescaped control character in JSON string: {msg}. \
250 Control characters (U+0000-U+001F) must be escaped as \\uXXXX \
251 per RFC 8259."
252 ))
253 } else {
254 JsonConversionError::ParseError(msg)
255 }
256 }
257}
258
259#[inline]
276fn is_integer_overflow(n: &serde_json::Number) -> bool {
277 n.as_i64().is_none() && (n.as_u64().is_some() || n.is_i64())
280}
281
282#[inline]
300fn json_number_to_value(n: &serde_json::Number) -> Result<Value, JsonConversionError> {
301 if let Some(i) = n.as_i64() {
303 return Ok(Value::Int(i));
304 }
305
306 if is_integer_overflow(n) {
308 return Err(JsonConversionError::IntegerOverflow {
309 value: n.to_string(),
310 max: i64::MAX,
311 min: i64::MIN,
312 });
313 }
314
315 if let Some(f) = n.as_f64() {
317 Ok(Value::Float(f))
318 } else {
319 Err(JsonConversionError::InvalidNumber(n.to_string()))
321 }
322}
323
324#[derive(Debug, Clone)]
371pub struct FromJsonConfig {
372 pub default_type_name: String,
374
375 pub version: (u32, u32),
377
378 pub max_depth: Option<usize>,
383
384 pub max_array_size: Option<usize>,
390
391 pub max_string_length: Option<usize>,
398
399 pub max_object_size: Option<usize>,
405
406 pub surrogate_policy: SurrogatePolicy,
413
414 #[cfg(feature = "lenient")]
424 pub lenient: bool,
425}
426
427impl Default for FromJsonConfig {
428 fn default() -> Self {
429 Self {
430 default_type_name: "Item".to_string(),
431 version: (1, 0),
432 max_depth: Some(DEFAULT_MAX_DEPTH),
433 max_array_size: Some(DEFAULT_MAX_ARRAY_SIZE),
434 max_string_length: Some(DEFAULT_MAX_STRING_LENGTH),
435 max_object_size: Some(DEFAULT_MAX_OBJECT_SIZE),
436 surrogate_policy: SurrogatePolicy::default(),
437 #[cfg(feature = "lenient")]
438 lenient: false,
439 }
440 }
441}
442
443impl FromJsonConfig {
444 #[must_use]
457 pub fn builder() -> FromJsonConfigBuilder {
458 FromJsonConfigBuilder::default()
459 }
460}
461
462impl hedl_core::convert::ImportConfig for FromJsonConfig {
463 fn default_type_name(&self) -> &str {
464 &self.default_type_name
465 }
466
467 fn version(&self) -> (u32, u32) {
468 self.version
469 }
470}
471
472#[derive(Debug, Clone)]
502pub struct FromJsonConfigBuilder {
503 default_type_name: String,
504 version: (u32, u32),
505 max_depth: Option<usize>,
506 max_array_size: Option<usize>,
507 max_string_length: Option<usize>,
508 max_object_size: Option<usize>,
509 surrogate_policy: SurrogatePolicy,
510 #[cfg(feature = "lenient")]
511 lenient: bool,
512}
513
514impl Default for FromJsonConfigBuilder {
515 fn default() -> Self {
516 Self {
517 default_type_name: "Item".to_string(),
518 version: (1, 0),
519 max_depth: Some(DEFAULT_MAX_DEPTH),
520 max_array_size: Some(DEFAULT_MAX_ARRAY_SIZE),
521 max_string_length: Some(DEFAULT_MAX_STRING_LENGTH),
522 max_object_size: Some(DEFAULT_MAX_OBJECT_SIZE),
523 surrogate_policy: SurrogatePolicy::default(),
524 #[cfg(feature = "lenient")]
525 lenient: false,
526 }
527 }
528}
529
530impl FromJsonConfigBuilder {
531 pub fn default_type_name(mut self, name: impl Into<String>) -> Self {
533 self.default_type_name = name.into();
534 self
535 }
536
537 #[must_use]
539 pub fn version(mut self, major: u32, minor: u32) -> Self {
540 self.version = (major, minor);
541 self
542 }
543
544 #[must_use]
548 pub fn max_depth(mut self, limit: usize) -> Self {
549 self.max_depth = Some(limit);
550 self
551 }
552
553 #[must_use]
557 pub fn max_array_size(mut self, limit: usize) -> Self {
558 self.max_array_size = Some(limit);
559 self
560 }
561
562 #[must_use]
566 pub fn max_string_length(mut self, limit: usize) -> Self {
567 self.max_string_length = Some(limit);
568 self
569 }
570
571 #[must_use]
575 pub fn max_object_size(mut self, limit: usize) -> Self {
576 self.max_object_size = Some(limit);
577 self
578 }
579
580 #[must_use]
598 pub fn surrogate_policy(mut self, policy: SurrogatePolicy) -> Self {
599 self.surrogate_policy = policy;
600 self
601 }
602
603 #[must_use]
608 pub fn unlimited(mut self) -> Self {
609 self.max_depth = None;
610 self.max_array_size = None;
611 self.max_string_length = None;
612 self.max_object_size = None;
613 self
614 }
615
616 #[cfg(feature = "lenient")]
637 #[must_use]
638 pub fn lenient(mut self, lenient: bool) -> Self {
639 self.lenient = lenient;
640 self
641 }
642
643 #[must_use]
645 pub fn build(self) -> FromJsonConfig {
646 FromJsonConfig {
647 default_type_name: self.default_type_name,
648 version: self.version,
649 max_depth: self.max_depth,
650 max_array_size: self.max_array_size,
651 max_string_length: self.max_string_length,
652 max_object_size: self.max_object_size,
653 surrogate_policy: self.surrogate_policy,
654 #[cfg(feature = "lenient")]
655 lenient: self.lenient,
656 }
657 }
658}
659
660type SchemaCache = HashMap<Vec<String>, Vec<String>>;
672
673fn preprocess_json_for_surrogates(
687 json: &str,
688 policy: SurrogatePolicy,
689) -> Result<String, JsonConversionError> {
690 if policy == SurrogatePolicy::Reject {
691 return Ok(json.to_string());
693 }
694
695 let bytes = json.as_bytes();
696 let mut result = String::with_capacity(json.len());
697 let mut i = 0;
698
699 while i < bytes.len() {
700 if i + 5 < bytes.len() && bytes[i] == b'\\' && bytes[i + 1] == b'u' {
702 if let Some(code) = parse_unicode_escape(&bytes[i + 2..i + 6]) {
704 let is_high_surrogate = (0xD800..=0xDBFF).contains(&code);
705 let is_low_surrogate = (0xDC00..=0xDFFF).contains(&code);
706
707 if is_high_surrogate {
708 let has_low_pair = i + 11 < bytes.len()
710 && bytes[i + 6] == b'\\'
711 && bytes[i + 7] == b'u'
712 && parse_unicode_escape(&bytes[i + 8..i + 12])
713 .is_some_and(|low| (0xDC00..=0xDFFF).contains(&low));
714
715 if has_low_pair {
716 result.push_str(&json[i..i + 12]);
718 i += 12;
719 continue;
720 }
721 match policy {
723 SurrogatePolicy::ReplaceWithFFFD => {
724 result.push_str("\\uFFFD");
725 }
726 SurrogatePolicy::Skip => {
727 }
729 SurrogatePolicy::Reject => unreachable!(),
730 }
731 i += 6;
732 continue;
733 } else if is_low_surrogate {
734 match policy {
736 SurrogatePolicy::ReplaceWithFFFD => {
737 result.push_str("\\uFFFD");
738 }
739 SurrogatePolicy::Skip => {
740 }
742 SurrogatePolicy::Reject => unreachable!(),
743 }
744 i += 6;
745 continue;
746 }
747 }
748 }
749
750 result.push(json[i..].chars().next().unwrap());
752 i += json[i..].chars().next().unwrap().len_utf8();
753 }
754
755 Ok(result)
756}
757
758#[inline]
760fn parse_unicode_escape(bytes: &[u8]) -> Option<u16> {
761 if bytes.len() < 4 {
762 return None;
763 }
764
765 let mut value: u16 = 0;
766 for &b in &bytes[..4] {
767 let digit = match b {
768 b'0'..=b'9' => b - b'0',
769 b'a'..=b'f' => b - b'a' + 10,
770 b'A'..=b'F' => b - b'A' + 10,
771 _ => return None,
772 };
773 value = value * 16 + u16::from(digit);
774 }
775 Some(value)
776}
777
778pub fn from_json(json: &str, config: &FromJsonConfig) -> Result<Document, JsonConversionError> {
800 let processed = preprocess_json_for_surrogates(json, config.surrogate_policy)?;
802 let json_to_parse = if config.surrogate_policy == SurrogatePolicy::Reject {
803 json
804 } else {
805 &processed
806 };
807
808 #[cfg(feature = "lenient")]
809 let value: JsonValue = if config.lenient {
810 serde_jsonrc::from_str(json_to_parse)
811 .map_err(|e| JsonConversionError::ParseError(e.to_string()))?
812 } else {
813 serde_json::from_str(json_to_parse)?
814 };
815
816 #[cfg(not(feature = "lenient"))]
817 let value: JsonValue = serde_json::from_str(json_to_parse)?;
818
819 from_json_value(&value, config)
820}
821
822pub fn from_json_value(
845 value: &JsonValue,
846 config: &FromJsonConfig,
847) -> Result<Document, JsonConversionError> {
848 let mut structs = BTreeMap::new();
849 let mut schema_cache = SchemaCache::new();
850 let root = match value {
851 JsonValue::Object(map) => {
852 json_object_to_root(map, config, &mut structs, &mut schema_cache, 0)?
853 }
854 _ => return Err(JsonConversionError::InvalidRoot(format!("{value:?}"))),
855 };
856
857 Ok(Document {
858 version: config.version,
859 schema_versions: BTreeMap::new(),
860 aliases: BTreeMap::new(),
861 structs,
862 nests: BTreeMap::new(),
863 root,
864 })
865}
866
867pub fn from_json_value_owned(
899 value: JsonValue,
900 config: &FromJsonConfig,
901) -> Result<Document, JsonConversionError> {
902 let mut structs = BTreeMap::new();
903 let mut schema_cache = SchemaCache::new();
904 let root = match value {
905 JsonValue::Object(map) => {
906 json_object_to_root_owned(map, config, &mut structs, &mut schema_cache, 0)?
907 }
908 _ => {
909 return Err(JsonConversionError::InvalidRoot(
910 "Root must be an object".to_string(),
911 ))
912 }
913 };
914
915 Ok(Document {
916 version: config.version,
917 schema_versions: BTreeMap::new(),
918 aliases: BTreeMap::new(),
919 structs,
920 nests: BTreeMap::new(),
921 root,
922 })
923}
924
925fn process_json_object_inner(
933 map: &Map<String, JsonValue>,
934 config: &FromJsonConfig,
935 structs: &mut BTreeMap<String, Vec<String>>,
936 schema_cache: &mut SchemaCache,
937 depth: usize,
938) -> Result<BTreeMap<String, Item>, JsonConversionError> {
939 if let Some(max_size) = config.max_object_size {
941 if map.len() > max_size {
942 return Err(JsonConversionError::MaxObjectSizeExceeded(
943 max_size,
944 map.len(),
945 ));
946 }
947 }
948
949 let mut result = BTreeMap::new();
952
953 if map.len() < 32 {
954 for (key, value) in map {
956 if key.starts_with("__") {
957 continue;
958 }
959 let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
960 result.insert(key.clone(), item);
961 }
962 } else {
963 let mut items: Vec<(String, Item)> = Vec::with_capacity(map.len());
965
966 for (key, value) in map {
967 if key.starts_with("__") {
968 continue;
969 }
970 let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
971 items.push((key.clone(), item));
972 }
973
974 items.sort_by(|a, b| a.0.cmp(&b.0));
976
977 for (key, item) in items {
979 result.insert(key, item);
980 }
981 }
982
983 Ok(result)
984}
985
986fn json_object_to_root(
987 map: &Map<String, JsonValue>,
988 config: &FromJsonConfig,
989 structs: &mut BTreeMap<String, Vec<String>>,
990 schema_cache: &mut SchemaCache,
991 depth: usize,
992) -> Result<BTreeMap<String, Item>, JsonConversionError> {
993 process_json_object_inner(map, config, structs, schema_cache, depth)
994}
995
996fn json_object_to_root_owned(
998 map: Map<String, JsonValue>,
999 config: &FromJsonConfig,
1000 structs: &mut BTreeMap<String, Vec<String>>,
1001 schema_cache: &mut SchemaCache,
1002 depth: usize,
1003) -> Result<BTreeMap<String, Item>, JsonConversionError> {
1004 if let Some(max_size) = config.max_object_size {
1006 if map.len() > max_size {
1007 return Err(JsonConversionError::MaxObjectSizeExceeded(
1008 max_size,
1009 map.len(),
1010 ));
1011 }
1012 }
1013
1014 let mut result = BTreeMap::new();
1015
1016 for (key, value) in map {
1017 if key.starts_with("__") {
1019 continue;
1020 }
1021
1022 let item = json_value_to_item_owned(value, &key, config, structs, schema_cache, depth)?;
1023 result.insert(key, item);
1024 }
1025
1026 Ok(result)
1027}
1028
1029fn json_object_to_item_map(
1030 map: &Map<String, JsonValue>,
1031 config: &FromJsonConfig,
1032 structs: &mut BTreeMap<String, Vec<String>>,
1033 schema_cache: &mut SchemaCache,
1034 depth: usize,
1035) -> Result<BTreeMap<String, Item>, JsonConversionError> {
1036 process_json_object_inner(map, config, structs, schema_cache, depth)
1037}
1038
1039fn json_value_to_item(
1040 value: &JsonValue,
1041 key: &str,
1042 config: &FromJsonConfig,
1043 structs: &mut BTreeMap<String, Vec<String>>,
1044 schema_cache: &mut SchemaCache,
1045 depth: usize,
1046) -> Result<Item, JsonConversionError> {
1047 if let Some(max_depth) = config.max_depth {
1049 if depth >= max_depth {
1050 return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1051 }
1052 }
1053
1054 match value {
1055 JsonValue::Null => Ok(Item::Scalar(Value::Null)),
1056 JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
1057 JsonValue::Number(n) => {
1058 let value = json_number_to_value(n)?;
1059 Ok(Item::Scalar(value))
1060 }
1061 JsonValue::String(s) => {
1062 if let Some(max_len) = config.max_string_length {
1064 if s.len() > max_len {
1065 return Err(JsonConversionError::MaxStringLengthExceeded(
1066 max_len,
1067 s.len(),
1068 ));
1069 }
1070 }
1071
1072 if s.starts_with("$(") && s.ends_with(')') {
1074 let expr = parse_expression_token(s)
1075 .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1076 Ok(Item::Scalar(Value::Expression(Box::new(expr))))
1077 } else {
1078 Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
1083 }
1084 }
1085 JsonValue::Array(arr) => {
1086 if let Some(max_size) = config.max_array_size {
1088 if arr.len() > max_size {
1089 return Err(JsonConversionError::MaxArraySizeExceeded(
1090 max_size,
1091 arr.len(),
1092 ));
1093 }
1094 }
1095
1096 if arr.is_empty() {
1098 let type_name = singularize_and_capitalize(key);
1099 let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
1100 let mut list = MatrixList::new(type_name.clone(), schema.clone());
1101 list.count_hint = Some(0);
1102 structs.insert(type_name, schema);
1103 Ok(Item::List(list))
1104 } else if is_tensor_array(arr) {
1105 let tensor = json_array_to_tensor(arr, config, depth + 1)?;
1107 Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1108 } else if is_object_array(arr) {
1109 let list =
1111 json_array_to_matrix_list(arr, key, config, structs, schema_cache, depth + 1)?;
1112 Ok(Item::List(list))
1113 } else {
1114 let tensor = json_array_to_tensor(arr, config, depth + 1)?;
1116 Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1117 }
1118 }
1119 JsonValue::Object(obj) => {
1120 if let Some(JsonValue::String(r)) = obj.get("@ref") {
1122 return Ok(Item::Scalar(Value::Reference(
1123 parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
1124 )));
1125 }
1126 let item_map = json_object_to_item_map(obj, config, structs, schema_cache, depth + 1)?;
1128 Ok(Item::Object(item_map))
1129 }
1130 }
1131}
1132
1133fn json_value_to_item_owned(
1135 value: JsonValue,
1136 key: &str,
1137 config: &FromJsonConfig,
1138 structs: &mut BTreeMap<String, Vec<String>>,
1139 schema_cache: &mut SchemaCache,
1140 depth: usize,
1141) -> Result<Item, JsonConversionError> {
1142 if let Some(max_depth) = config.max_depth {
1144 if depth >= max_depth {
1145 return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1146 }
1147 }
1148
1149 match value {
1150 JsonValue::Null => Ok(Item::Scalar(Value::Null)),
1151 JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(b))),
1152 JsonValue::Number(n) => {
1153 let value = json_number_to_value(&n)?;
1154 Ok(Item::Scalar(value))
1155 }
1156 JsonValue::String(s) => {
1157 if let Some(max_len) = config.max_string_length {
1159 if s.len() > max_len {
1160 return Err(JsonConversionError::MaxStringLengthExceeded(
1161 max_len,
1162 s.len(),
1163 ));
1164 }
1165 }
1166
1167 if s.starts_with("$(") && s.ends_with(')') {
1169 let expr = parse_expression_token(&s)
1170 .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1171 Ok(Item::Scalar(Value::Expression(Box::new(expr))))
1172 } else {
1173 Ok(Item::Scalar(Value::String(s.into_boxed_str())))
1175 }
1176 }
1177 JsonValue::Array(arr) => {
1178 if let Some(max_size) = config.max_array_size {
1180 if arr.len() > max_size {
1181 return Err(JsonConversionError::MaxArraySizeExceeded(
1182 max_size,
1183 arr.len(),
1184 ));
1185 }
1186 }
1187
1188 if arr.is_empty() {
1190 let type_name = singularize_and_capitalize(key);
1191 let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
1192 let mut list = MatrixList::new(type_name.clone(), schema.clone());
1193 list.count_hint = Some(0);
1194 structs.insert(type_name, schema);
1195 Ok(Item::List(list))
1196 } else if is_tensor_array(&arr) {
1197 let tensor = json_array_to_tensor_owned(arr, config, depth + 1)?;
1199 Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1200 } else if is_object_array(&arr) {
1201 let list =
1203 json_array_to_matrix_list(&arr, key, config, structs, schema_cache, depth + 1)?;
1204 Ok(Item::List(list))
1205 } else {
1206 let tensor = json_array_to_tensor_owned(arr, config, depth + 1)?;
1208 Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1209 }
1210 }
1211 JsonValue::Object(obj) => {
1212 if let Some(JsonValue::String(r)) = obj.get("@ref") {
1214 return Ok(Item::Scalar(Value::Reference(
1215 parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
1216 )));
1217 }
1218 let item_map = json_object_to_item_map(&obj, config, structs, schema_cache, depth + 1)?;
1220 Ok(Item::Object(item_map))
1221 }
1222 }
1223}
1224
1225#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1234enum ArrayType {
1235 Empty,
1237 Tensor,
1239 Objects,
1241 Mixed,
1243}
1244
1245fn classify_array(arr: &[JsonValue]) -> ArrayType {
1249 if arr.is_empty() {
1250 return ArrayType::Empty;
1251 }
1252
1253 let first_type = match &arr[0] {
1255 JsonValue::Number(_) | JsonValue::Array(_) => ArrayType::Tensor,
1256 JsonValue::Object(_) => ArrayType::Objects,
1257 _ => return ArrayType::Mixed,
1258 };
1259
1260 for elem in &arr[1..] {
1262 let matches = match (first_type, elem) {
1263 (ArrayType::Tensor, JsonValue::Number(_)) => true,
1264 (ArrayType::Tensor, JsonValue::Array(_)) => true,
1265 (ArrayType::Objects, JsonValue::Object(_)) => true,
1266 _ => return ArrayType::Mixed,
1267 };
1268 if !matches {
1269 return ArrayType::Mixed;
1270 }
1271 }
1272
1273 first_type
1274}
1275
1276fn is_tensor_array(arr: &[JsonValue]) -> bool {
1278 matches!(classify_array(arr), ArrayType::Tensor)
1279}
1280
1281fn is_object_array(arr: &[JsonValue]) -> bool {
1282 matches!(classify_array(arr), ArrayType::Objects)
1283}
1284
1285fn json_array_to_tensor(
1286 arr: &[JsonValue],
1287 config: &FromJsonConfig,
1288 depth: usize,
1289) -> Result<Tensor, JsonConversionError> {
1290 if let Some(max_depth) = config.max_depth {
1292 if depth >= max_depth {
1293 return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1294 }
1295 }
1296
1297 let mut items = Vec::with_capacity(arr.len());
1300
1301 for v in arr {
1302 let tensor = match v {
1303 JsonValue::Number(n) => {
1304 if is_integer_overflow(n) {
1306 }
1309 n.as_f64()
1310 .map(Tensor::Scalar)
1311 .ok_or_else(|| JsonConversionError::InvalidNumber(n.to_string()))?
1312 }
1313 JsonValue::Array(nested) => json_array_to_tensor(nested, config, depth + 1)?,
1314 _ => return Err(JsonConversionError::InvalidTensor),
1315 };
1316 items.push(tensor);
1317 }
1318
1319 Ok(Tensor::Array(items))
1320}
1321
1322fn json_array_to_tensor_owned(
1324 arr: Vec<JsonValue>,
1325 config: &FromJsonConfig,
1326 depth: usize,
1327) -> Result<Tensor, JsonConversionError> {
1328 if let Some(max_depth) = config.max_depth {
1330 if depth >= max_depth {
1331 return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1332 }
1333 }
1334
1335 let mut items = Vec::with_capacity(arr.len());
1338
1339 for v in arr {
1340 let tensor = match v {
1341 JsonValue::Number(n) => {
1342 if is_integer_overflow(&n) {
1344 }
1347 n.as_f64()
1348 .map(Tensor::Scalar)
1349 .ok_or_else(|| JsonConversionError::InvalidNumber(n.to_string()))?
1350 }
1351 JsonValue::Array(nested) => json_array_to_tensor_owned(nested, config, depth + 1)?,
1352 _ => return Err(JsonConversionError::InvalidTensor),
1353 };
1354 items.push(tensor);
1355 }
1356
1357 Ok(Tensor::Array(items))
1358}
1359
1360#[allow(clippy::only_used_in_recursion)]
1361fn json_array_to_matrix_list(
1362 arr: &[JsonValue],
1363 key: &str,
1364 config: &FromJsonConfig,
1365 structs: &mut BTreeMap<String, Vec<String>>,
1366 schema_cache: &mut SchemaCache,
1367 depth: usize,
1368) -> Result<MatrixList, JsonConversionError> {
1369 if let Some(max_depth) = config.max_depth {
1371 if depth >= max_depth {
1372 return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1373 }
1374 }
1375 let type_name = singularize_and_capitalize(key);
1377
1378 let schema: Vec<String> = if let Some(JsonValue::Object(first)) = arr.first() {
1380 let inferred = if let Some(JsonValue::Array(schema_arr)) = first.get("__hedl_schema") {
1382 schema_arr
1383 .iter()
1384 .filter_map(|v| v.as_str().map(String::from))
1385 .collect()
1386 } else {
1387 let mut cache_key: SmallVec<[String; 16]> = SmallVec::with_capacity(first.len());
1391
1392 for k in first.keys() {
1393 if k.starts_with("__") {
1394 continue;
1395 }
1396 if let Some(JsonValue::Array(arr)) = first.get(k) {
1398 if is_object_array(arr) {
1399 continue;
1400 }
1401 }
1402 cache_key.push(k.clone());
1403 }
1404 cache_key.sort();
1405
1406 let cache_key_vec: Vec<String> = cache_key.iter().cloned().collect();
1408
1409 if let Some(cached_schema) = schema_cache.get(&cache_key_vec) {
1411 cached_schema.clone()
1412 } else {
1413 let mut keys = cache_key_vec.clone();
1415
1416 if let Some(pos) = keys.iter().position(|k| k == "id") {
1418 keys.remove(pos);
1419 keys.insert(0, "id".to_string());
1420 }
1421
1422 schema_cache.insert(cache_key_vec, keys.clone());
1424 keys
1425 }
1426 };
1427 if inferred.is_empty() {
1429 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
1430 } else {
1431 inferred
1432 }
1433 } else {
1434 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
1435 };
1436
1437 structs.insert(type_name.clone(), schema.clone());
1439
1440 let mut rows = Vec::with_capacity(arr.len());
1443
1444 for item in arr {
1445 if let JsonValue::Object(obj) = item {
1446 let id = obj
1448 .get(&schema[0])
1449 .and_then(|v| v.as_str())
1450 .unwrap_or("")
1451 .to_string();
1452
1453 let mut fields: SmallVec<[Value; 16]> = SmallVec::with_capacity(schema.len());
1456 for col in &schema {
1457 let value = obj
1458 .get(col)
1459 .map(|v| json_to_value(v, config))
1460 .transpose()?
1461 .unwrap_or(Value::Null);
1462 fields.push(value);
1463 }
1464
1465 let fields_vec: Vec<Value> = fields.into_vec();
1467
1468 let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
1471
1472 let child_count = obj
1474 .iter()
1475 .filter(|(_, v)| matches!(v, JsonValue::Array(arr) if is_object_array(arr)))
1476 .count();
1477
1478 if child_count < 8 {
1479 for (child_key, child_value) in obj {
1481 if let JsonValue::Array(child_arr) = child_value {
1482 if is_object_array(child_arr) {
1483 let child_list = json_array_to_matrix_list(
1484 child_arr,
1485 child_key,
1486 config,
1487 structs,
1488 schema_cache,
1489 depth + 1,
1490 )?;
1491 children.insert(child_key.clone(), child_list.rows);
1492 }
1493 }
1494 }
1495 } else {
1496 let mut child_items: Vec<(String, Vec<Node>)> = Vec::with_capacity(child_count);
1498 for (child_key, child_value) in obj {
1499 if let JsonValue::Array(child_arr) = child_value {
1500 if is_object_array(child_arr) {
1501 let child_list = json_array_to_matrix_list(
1502 child_arr,
1503 child_key,
1504 config,
1505 structs,
1506 schema_cache,
1507 depth + 1,
1508 )?;
1509 child_items.push((child_key.clone(), child_list.rows));
1510 }
1511 }
1512 }
1513 child_items.sort_by(|a, b| a.0.cmp(&b.0));
1514 for (key, nodes) in child_items {
1515 children.insert(key, nodes);
1516 }
1517 }
1518
1519 let node = Node {
1520 type_name: type_name.clone(),
1521 id,
1522 fields: fields_vec.into(),
1523 children: if children.is_empty() {
1524 None
1525 } else {
1526 Some(Box::new(children))
1527 },
1528 child_count: 0,
1529 };
1530
1531 rows.push(node);
1532 }
1533 }
1534
1535 let count_hint = Some(arr.len());
1537
1538 Ok(MatrixList {
1539 type_name,
1540 schema,
1541 rows,
1542 count_hint,
1543 })
1544}
1545
1546fn json_to_value(value: &JsonValue, config: &FromJsonConfig) -> Result<Value, JsonConversionError> {
1547 Ok(match value {
1548 JsonValue::Null => Value::Null,
1549 JsonValue::Bool(b) => Value::Bool(*b),
1550 JsonValue::Number(n) => json_number_to_value(n)?,
1551 JsonValue::String(s) => {
1552 if let Some(max_len) = config.max_string_length {
1554 if s.len() > max_len {
1555 return Err(JsonConversionError::MaxStringLengthExceeded(
1556 max_len,
1557 s.len(),
1558 ));
1559 }
1560 }
1561
1562 if s.starts_with("$(") && s.ends_with(')') {
1564 let expr = parse_expression_token(s)
1565 .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1566 Value::Expression(Box::new(expr))
1567 } else {
1568 Value::String(s.clone().into_boxed_str())
1569 }
1570 }
1571 JsonValue::Array(arr) => {
1572 if let Some(max_size) = config.max_array_size {
1574 if arr.len() > max_size {
1575 return Err(JsonConversionError::MaxArraySizeExceeded(
1576 max_size,
1577 arr.len(),
1578 ));
1579 }
1580 }
1581
1582 if is_object_array(arr) {
1585 Value::Null } else if is_tensor_array(arr) {
1587 let tensor = json_array_to_tensor(arr, config, 0)?;
1588 Value::Tensor(Box::new(tensor))
1589 } else if arr.is_empty() {
1590 Value::Tensor(Box::new(Tensor::Array(vec![])))
1592 } else {
1593 let tensor = json_array_to_tensor(arr, config, 0)?;
1595 Value::Tensor(Box::new(tensor))
1596 }
1597 }
1598 JsonValue::Object(obj) => {
1599 if let Some(JsonValue::String(r)) = obj.get("@ref") {
1600 Value::Reference(parse_reference(r).map_err(JsonConversionError::InvalidReference)?)
1601 } else {
1602 return Err(JsonConversionError::NestedObject);
1603 }
1604 }
1605 })
1606}
1607
1608#[cfg(test)]
1609mod tests {
1610 use super::*;
1611 use serde_json::json;
1612
1613 #[test]
1616 fn test_from_json_config_default() {
1617 let config = FromJsonConfig::default();
1618 assert_eq!(config.default_type_name, "Item");
1619 assert_eq!(config.version, (1, 0));
1620 assert_eq!(config.max_depth, Some(DEFAULT_MAX_DEPTH));
1621 assert_eq!(config.max_array_size, Some(DEFAULT_MAX_ARRAY_SIZE));
1622 assert_eq!(config.max_string_length, Some(DEFAULT_MAX_STRING_LENGTH));
1623 assert_eq!(config.max_object_size, Some(DEFAULT_MAX_OBJECT_SIZE));
1624 assert_eq!(config.max_depth, Some(10_000));
1626 assert_eq!(config.max_array_size, Some(10_000_000));
1627 assert_eq!(config.max_string_length, Some(100 * 1024 * 1024));
1628 assert_eq!(config.max_object_size, Some(100_000));
1629 }
1630
1631 #[test]
1632 fn test_from_json_config_debug() {
1633 let config = FromJsonConfig::default();
1634 let debug = format!("{config:?}");
1635 assert!(debug.contains("FromJsonConfig"));
1636 assert!(debug.contains("default_type_name"));
1637 assert!(debug.contains("version"));
1638 }
1639
1640 #[test]
1641 fn test_from_json_config_clone() {
1642 let config = FromJsonConfig {
1643 default_type_name: "Custom".to_string(),
1644 version: (2, 1),
1645 max_depth: Some(50),
1646 max_array_size: Some(10_000),
1647 max_string_length: Some(1_000_000),
1648 max_object_size: Some(1_000),
1649 surrogate_policy: SurrogatePolicy::Reject,
1650 #[cfg(feature = "lenient")]
1651 lenient: false,
1652 };
1653 let cloned = config.clone();
1654 assert_eq!(cloned.default_type_name, "Custom");
1655 assert_eq!(cloned.version, (2, 1));
1656 assert_eq!(cloned.max_depth, Some(50));
1657 }
1658
1659 #[test]
1662 fn test_builder_default() {
1663 let config = FromJsonConfig::builder().build();
1664 assert_eq!(config.default_type_name, "Item");
1665 assert_eq!(config.version, (1, 0));
1666 assert_eq!(config.max_depth, Some(DEFAULT_MAX_DEPTH));
1667 assert_eq!(config.max_array_size, Some(DEFAULT_MAX_ARRAY_SIZE));
1668 assert_eq!(config.max_string_length, Some(DEFAULT_MAX_STRING_LENGTH));
1669 assert_eq!(config.max_object_size, Some(DEFAULT_MAX_OBJECT_SIZE));
1670 }
1671
1672 #[test]
1673 fn test_builder_custom_limits() {
1674 let config = FromJsonConfig::builder()
1675 .max_depth(1_000)
1676 .max_array_size(100_000)
1677 .max_string_length(10 * 1024 * 1024)
1678 .max_object_size(5_000)
1679 .build();
1680
1681 assert_eq!(config.max_depth, Some(1_000));
1682 assert_eq!(config.max_array_size, Some(100_000));
1683 assert_eq!(config.max_string_length, Some(10 * 1024 * 1024));
1684 assert_eq!(config.max_object_size, Some(5_000));
1685 }
1686
1687 #[test]
1688 fn test_builder_unlimited() {
1689 let config = FromJsonConfig::builder().unlimited().build();
1690
1691 assert_eq!(config.max_depth, None);
1692 assert_eq!(config.max_array_size, None);
1693 assert_eq!(config.max_string_length, None);
1694 assert_eq!(config.max_object_size, None);
1695 }
1696
1697 #[test]
1698 fn test_builder_custom_type_and_version() {
1699 let config = FromJsonConfig::builder()
1700 .default_type_name("CustomType")
1701 .version(2, 1)
1702 .build();
1703
1704 assert_eq!(config.default_type_name, "CustomType");
1705 assert_eq!(config.version, (2, 1));
1706 }
1707
1708 #[test]
1709 fn test_builder_chaining() {
1710 let config = FromJsonConfig::builder()
1711 .default_type_name("Entity")
1712 .version(1, 5)
1713 .max_depth(500)
1714 .max_array_size(50_000)
1715 .max_string_length(5 * 1024 * 1024)
1716 .max_object_size(2_500)
1717 .build();
1718
1719 assert_eq!(config.default_type_name, "Entity");
1720 assert_eq!(config.version, (1, 5));
1721 assert_eq!(config.max_depth, Some(500));
1722 assert_eq!(config.max_array_size, Some(50_000));
1723 assert_eq!(config.max_string_length, Some(5 * 1024 * 1024));
1724 assert_eq!(config.max_object_size, Some(2_500));
1725 }
1726
1727 #[test]
1730 fn test_parse_reference_qualified() {
1731 let r = parse_reference("@User:123").unwrap();
1732 assert_eq!(r.type_name, Some("User".to_string().into()));
1733 assert_eq!(r.id, "123".into());
1734 }
1735
1736 #[test]
1737 fn test_parse_reference_local() {
1738 let r = parse_reference("@123").unwrap();
1739 assert_eq!(r.type_name, None);
1740 assert_eq!(r.id, "123".into());
1741 }
1742
1743 #[test]
1744 fn test_parse_reference_invalid() {
1745 let result = parse_reference("User:123");
1746 assert!(result.is_err());
1747 }
1748
1749 #[test]
1752 fn test_is_tensor_array_numbers() {
1753 let arr = vec![json!(1), json!(2), json!(3)];
1754 assert!(is_tensor_array(&arr));
1755 }
1756
1757 #[test]
1758 fn test_is_tensor_array_nested() {
1759 let arr = vec![json!([1, 2]), json!([3, 4])];
1760 assert!(is_tensor_array(&arr));
1761 }
1762
1763 #[test]
1764 fn test_is_tensor_array_empty() {
1765 let arr: Vec<JsonValue> = vec![];
1766 assert!(!is_tensor_array(&arr));
1767 }
1768
1769 #[test]
1770 fn test_is_tensor_array_with_strings() {
1771 let arr = vec![json!(1), json!("not a tensor")];
1772 assert!(!is_tensor_array(&arr));
1773 }
1774
1775 #[test]
1776 fn test_is_tensor_array_with_objects() {
1777 let arr = vec![json!({"id": 1})];
1778 assert!(!is_tensor_array(&arr));
1779 }
1780
1781 #[test]
1784 fn test_is_object_array_true() {
1785 let arr = vec![json!({"id": 1}), json!({"id": 2})];
1786 assert!(is_object_array(&arr));
1787 }
1788
1789 #[test]
1790 fn test_is_object_array_empty() {
1791 let arr: Vec<JsonValue> = vec![];
1792 assert!(!is_object_array(&arr));
1793 }
1794
1795 #[test]
1796 fn test_is_object_array_mixed() {
1797 let arr = vec![json!({"id": 1}), json!(123)];
1798 assert!(!is_object_array(&arr));
1799 }
1800
1801 #[test]
1804 fn test_json_array_to_tensor_1d() {
1805 let arr = vec![json!(1.0), json!(2.0), json!(3.0)];
1806 let config = FromJsonConfig::default();
1807 let tensor = json_array_to_tensor(&arr, &config, 0).unwrap();
1808 assert_eq!(tensor.flatten(), vec![1.0, 2.0, 3.0]);
1809 }
1810
1811 #[test]
1812 fn test_json_array_to_tensor_2d() {
1813 let arr = vec![json!([1.0, 2.0]), json!([3.0, 4.0])];
1814 let config = FromJsonConfig::default();
1815 let tensor = json_array_to_tensor(&arr, &config, 0).unwrap();
1816 assert_eq!(tensor.flatten(), vec![1.0, 2.0, 3.0, 4.0]);
1817 }
1818
1819 #[test]
1820 fn test_json_array_to_tensor_invalid_element() {
1821 let arr = vec![json!(1.0), json!("not a number")];
1822 let config = FromJsonConfig::default();
1823 let result = json_array_to_tensor(&arr, &config, 0);
1824 assert!(result.is_err());
1825 }
1826
1827 #[test]
1830 fn test_json_to_value_null() {
1831 let config = FromJsonConfig::default();
1832 let result = json_to_value(&JsonValue::Null, &config).unwrap();
1833 assert!(matches!(result, Value::Null));
1834 }
1835
1836 #[test]
1837 fn test_json_to_value_bool() {
1838 let config = FromJsonConfig::default();
1839 let result = json_to_value(&json!(true), &config).unwrap();
1840 assert!(matches!(result, Value::Bool(true)));
1841
1842 let result = json_to_value(&json!(false), &config).unwrap();
1843 assert!(matches!(result, Value::Bool(false)));
1844 }
1845
1846 #[test]
1847 fn test_json_to_value_int() {
1848 let config = FromJsonConfig::default();
1849 let result = json_to_value(&json!(42), &config).unwrap();
1850 assert!(matches!(result, Value::Int(42)));
1851 }
1852
1853 #[test]
1854 fn test_json_to_value_float() {
1855 let config = FromJsonConfig::default();
1856 let result = json_to_value(&json!(3.5), &config).unwrap();
1857 if let Value::Float(f) = result {
1858 assert!((f - 3.5).abs() < 0.001);
1859 } else {
1860 panic!("Expected Float");
1861 }
1862 }
1863
1864 #[test]
1865 fn test_json_to_value_string() {
1866 let config = FromJsonConfig::default();
1867 let result = json_to_value(&json!("hello"), &config).unwrap();
1868 assert!(matches!(result, Value::String(ref s) if s.as_ref() == "hello"));
1869 }
1870
1871 #[test]
1872 fn test_json_to_value_expression() {
1873 let config = FromJsonConfig::default();
1874 let result = json_to_value(&json!("$(foo)"), &config).unwrap();
1875 assert!(matches!(result, Value::Expression(_)));
1876 }
1877
1878 #[test]
1879 fn test_json_to_value_tensor() {
1880 let config = FromJsonConfig::default();
1881 let result = json_to_value(&json!([1.0, 2.0, 3.0]), &config).unwrap();
1882 if let Value::Tensor(t) = result {
1883 assert_eq!(t.flatten(), vec![1.0, 2.0, 3.0]);
1884 } else {
1885 panic!("Expected Tensor");
1886 }
1887 }
1888
1889 #[test]
1890 fn test_json_to_value_reference() {
1891 let config = FromJsonConfig::default();
1892 let result = json_to_value(&json!({"@ref": "@User:123"}), &config).unwrap();
1893 if let Value::Reference(r) = result {
1894 assert_eq!(r.type_name, Some("User".to_string().into()));
1895 assert_eq!(r.id, "123".into());
1896 } else {
1897 panic!("Expected Reference");
1898 }
1899 }
1900
1901 #[test]
1902 fn test_json_to_value_nested_object_error() {
1903 let config = FromJsonConfig::default();
1904 let result = json_to_value(&json!({"key": "value"}), &config);
1905 assert!(result.is_err());
1906 }
1907
1908 #[test]
1911 fn test_from_json_empty_object() {
1912 let json = "{}";
1913 let config = FromJsonConfig::default();
1914 let doc = from_json(json, &config).unwrap();
1915 assert!(doc.root.is_empty());
1916 assert_eq!(doc.version, (1, 0));
1917 }
1918
1919 #[test]
1920 fn test_from_json_simple_scalars() {
1921 let json = r#"{"name": "test", "count": 42, "active": true}"#;
1922 let config = FromJsonConfig::default();
1923 let doc = from_json(json, &config).unwrap();
1924 assert!(doc.root.contains_key("name"));
1925 assert!(doc.root.contains_key("count"));
1926 assert!(doc.root.contains_key("active"));
1927 }
1928
1929 #[test]
1930 fn test_from_json_nested_object() {
1931 let json = r#"{"outer": {"inner": "value"}}"#;
1932 let config = FromJsonConfig::default();
1933 let doc = from_json(json, &config).unwrap();
1934 if let Item::Object(obj) = &doc.root["outer"] {
1935 assert!(obj.contains_key("inner"));
1936 } else {
1937 panic!("Expected Object");
1938 }
1939 }
1940
1941 #[test]
1942 fn test_from_json_array_of_objects() {
1943 let json = r#"{"users": [{"id": "1", "name": "Alice"}]}"#;
1944 let config = FromJsonConfig::default();
1945 let doc = from_json(json, &config).unwrap();
1946 if let Item::List(list) = &doc.root["users"] {
1947 assert_eq!(list.type_name, "User");
1948 assert_eq!(list.rows.len(), 1);
1949 } else {
1950 panic!("Expected List");
1951 }
1952 }
1953
1954 #[test]
1955 fn test_from_json_tensor() {
1956 let json = r#"{"data": [1, 2, 3]}"#;
1957 let config = FromJsonConfig::default();
1958 let doc = from_json(json, &config).unwrap();
1959 if let Item::Scalar(Value::Tensor(t)) = &doc.root["data"] {
1960 assert_eq!(t.flatten(), vec![1.0, 2.0, 3.0]);
1961 } else {
1962 panic!("Expected Tensor");
1963 }
1964 }
1965
1966 #[test]
1967 fn test_from_json_invalid_json() {
1968 let json = "not valid json";
1969 let config = FromJsonConfig::default();
1970 let result = from_json(json, &config);
1971 assert!(result.is_err());
1972 }
1973
1974 #[test]
1975 fn test_from_json_non_object_root() {
1976 let json = "[1, 2, 3]";
1977 let config = FromJsonConfig::default();
1978 let result = from_json(json, &config);
1979 assert!(result.is_err());
1980 }
1981
1982 #[test]
1985 fn test_from_json_value_simple() {
1986 let value = json!({"key": 42});
1987 let config = FromJsonConfig::default();
1988 let doc = from_json_value(&value, &config).unwrap();
1989 if let Item::Scalar(Value::Int(n)) = &doc.root["key"] {
1990 assert_eq!(*n, 42);
1991 } else {
1992 panic!("Expected Int");
1993 }
1994 }
1995
1996 #[test]
1999 fn test_json_value_to_item_null() {
2000 let config = FromJsonConfig::default();
2001 let mut structs = BTreeMap::new();
2002 let mut schema_cache = SchemaCache::new();
2003 let result = json_value_to_item(
2004 &JsonValue::Null,
2005 "test",
2006 &config,
2007 &mut structs,
2008 &mut schema_cache,
2009 0,
2010 )
2011 .unwrap();
2012 assert!(matches!(result, Item::Scalar(Value::Null)));
2013 }
2014
2015 #[test]
2016 fn test_json_value_to_item_bool() {
2017 let config = FromJsonConfig::default();
2018 let mut structs = BTreeMap::new();
2019 let mut schema_cache = SchemaCache::new();
2020 let result = json_value_to_item(
2021 &json!(true),
2022 "test",
2023 &config,
2024 &mut structs,
2025 &mut schema_cache,
2026 0,
2027 )
2028 .unwrap();
2029 assert!(matches!(result, Item::Scalar(Value::Bool(true))));
2030 }
2031
2032 #[test]
2033 fn test_json_value_to_item_empty_array() {
2034 let config = FromJsonConfig::default();
2035 let mut structs = BTreeMap::new();
2036 let mut schema_cache = SchemaCache::new();
2037 let result = json_value_to_item(
2038 &json!([]),
2039 "items",
2040 &config,
2041 &mut structs,
2042 &mut schema_cache,
2043 0,
2044 )
2045 .unwrap();
2046 if let Item::List(list) = result {
2047 assert!(list.rows.is_empty());
2048 assert_eq!(list.type_name, "Item");
2049 } else {
2050 panic!("Expected List");
2051 }
2052 }
2053
2054 #[test]
2057 fn test_schema_inference_id_first() {
2058 let json = r#"{"users": [{"name": "Alice", "id": "1", "age": 30}]}"#;
2059 let config = FromJsonConfig::default();
2060 let doc = from_json(json, &config).unwrap();
2061 if let Item::List(list) = &doc.root["users"] {
2062 assert_eq!(list.schema[0], "id"); } else {
2064 panic!("Expected List");
2065 }
2066 }
2067
2068 #[test]
2069 fn test_struct_registration() {
2070 let json = r#"{"users": [{"id": "1"}]}"#;
2071 let config = FromJsonConfig::default();
2072 let doc = from_json(json, &config).unwrap();
2073 assert!(doc.structs.contains_key("User"));
2074 }
2075
2076 #[test]
2079 fn test_max_depth_exceeded() {
2080 let json = r#"{"a":1}"#;
2083
2084 let config = FromJsonConfig {
2085 default_type_name: "Item".to_string(),
2086 version: (1, 0),
2087 max_depth: Some(0), max_array_size: Some(100_000),
2089 max_string_length: Some(10_000_000),
2090 max_object_size: Some(10_000),
2091 surrogate_policy: SurrogatePolicy::Reject,
2092 #[cfg(feature = "lenient")]
2093 lenient: false,
2094 };
2095
2096 let result = from_json(json, &config);
2097 assert!(result.is_err(), "Expected error for depth 0");
2098 let err_msg = result.unwrap_err().to_string();
2099 assert!(err_msg.contains("Maximum recursion depth"));
2100 }
2101
2102 #[test]
2103 fn test_max_array_size_exceeded() {
2104 let config = FromJsonConfig {
2105 default_type_name: "Item".to_string(),
2106 version: (1, 0),
2107 max_depth: Some(100),
2108 max_array_size: Some(10), max_string_length: Some(10_000_000),
2110 max_object_size: Some(10_000),
2111 surrogate_policy: SurrogatePolicy::Reject,
2112 #[cfg(feature = "lenient")]
2113 lenient: false,
2114 };
2115
2116 let json = r#"{"items": [1,2,3,4,5,6,7,8,9,10,11]}"#;
2118 let result = from_json(json, &config);
2119 assert!(result.is_err());
2120 let err_msg = result.unwrap_err().to_string();
2121 assert!(err_msg.contains("Maximum array size"));
2122 }
2123
2124 #[test]
2125 fn test_max_string_length_exceeded() {
2126 let config = FromJsonConfig {
2127 default_type_name: "Item".to_string(),
2128 version: (1, 0),
2129 max_depth: Some(100),
2130 max_array_size: Some(100_000),
2131 max_string_length: Some(100), max_object_size: Some(10_000),
2133 surrogate_policy: SurrogatePolicy::Reject,
2134 #[cfg(feature = "lenient")]
2135 lenient: false,
2136 };
2137
2138 let long_string = "a".repeat(101);
2140 let json = format!(r#"{{"text": "{long_string}"}}"#);
2141 let result = from_json(&json, &config);
2142 assert!(result.is_err());
2143 let err_msg = result.unwrap_err().to_string();
2144 assert!(err_msg.contains("Maximum string length"));
2145 }
2146
2147 #[test]
2148 fn test_max_object_size_exceeded() {
2149 let config = FromJsonConfig {
2150 default_type_name: "Item".to_string(),
2151 version: (1, 0),
2152 max_depth: Some(100),
2153 max_array_size: Some(100_000),
2154 max_string_length: Some(10_000_000),
2155 max_object_size: Some(5), surrogate_policy: SurrogatePolicy::Reject,
2157 #[cfg(feature = "lenient")]
2158 lenient: false,
2159 };
2160
2161 let json = r#"{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6}"#;
2163 let result = from_json(json, &config);
2164 assert!(result.is_err());
2165 let err_msg = result.unwrap_err().to_string();
2166 assert!(err_msg.contains("Maximum object size"));
2167 }
2168
2169 #[test]
2170 fn test_limits_disabled() {
2171 let config = FromJsonConfig {
2172 default_type_name: "Item".to_string(),
2173 version: (1, 0),
2174 max_depth: None, max_array_size: None,
2176 max_string_length: None,
2177 max_object_size: None,
2178 surrogate_policy: SurrogatePolicy::Reject,
2179 #[cfg(feature = "lenient")]
2180 lenient: false,
2181 };
2182
2183 let long_string = "a".repeat(1000);
2185 let json = format!(r#"{{"text": "{long_string}"}}"#);
2186 let result = from_json(&json, &config);
2187 assert!(result.is_ok());
2188 }
2189
2190 #[test]
2191 fn test_error_message_quality() {
2192 let config = FromJsonConfig::default();
2193
2194 let result1 = from_json("not json", &config);
2196 assert!(result1
2197 .unwrap_err()
2198 .to_string()
2199 .contains("JSON parse error"));
2200
2201 let result2 = from_json("[1,2,3]", &config);
2202 assert!(result2.unwrap_err().to_string().contains("Root must be"));
2203
2204 let result3 = from_json(r#"{"ref": {"@ref": "bad"}}"#, &config);
2205 assert!(result3.is_err()); }
2207}
2208
2209#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
2217pub enum ErrorTolerance {
2218 #[default]
2220 StopOnFirst,
2221
2222 MaxErrors(usize),
2224
2225 CollectAll,
2227
2228 SkipInvalidItems,
2230}
2231
2232#[derive(Debug, Clone, PartialEq, Eq)]
2234pub struct ErrorLocation {
2235 pub path: String,
2237
2238 pub depth: usize,
2240}
2241
2242impl ErrorLocation {
2243 fn root() -> Self {
2244 Self {
2245 path: "$".to_string(),
2246 depth: 0,
2247 }
2248 }
2249
2250 fn child(&self, key: &str) -> Self {
2251 Self {
2252 path: format!("{}.{}", self.path, key),
2253 depth: self.depth + 1,
2254 }
2255 }
2256
2257 fn index(&self, idx: usize) -> Self {
2258 Self {
2259 path: format!("{}[{}]", self.path, idx),
2260 depth: self.depth + 1,
2261 }
2262 }
2263}
2264
2265#[derive(Debug, Clone)]
2267pub struct ParseError {
2268 pub error: JsonConversionError,
2270
2271 pub location: ErrorLocation,
2273
2274 pub is_fatal: bool,
2276}
2277
2278impl ParseError {
2279 fn new(error: JsonConversionError, location: ErrorLocation, is_fatal: bool) -> Self {
2280 Self {
2281 error,
2282 location,
2283 is_fatal,
2284 }
2285 }
2286}
2287
2288#[derive(Debug, Clone, Default)]
2290pub struct PartialConfig {
2291 pub from_json_config: FromJsonConfig,
2293
2294 pub tolerance: ErrorTolerance,
2296
2297 pub include_partial_on_fatal: bool,
2299
2300 pub replace_invalid_with_null: bool,
2302}
2303
2304impl PartialConfig {
2305 #[must_use]
2307 pub fn builder() -> PartialConfigBuilder {
2308 PartialConfigBuilder::default()
2309 }
2310}
2311
2312#[derive(Debug, Clone, Default)]
2314pub struct PartialConfigBuilder {
2315 from_json_config: FromJsonConfig,
2316 tolerance: ErrorTolerance,
2317 include_partial_on_fatal: bool,
2318 replace_invalid_with_null: bool,
2319}
2320
2321impl PartialConfigBuilder {
2322 #[must_use]
2324 pub fn from_json_config(mut self, config: FromJsonConfig) -> Self {
2325 self.from_json_config = config;
2326 self
2327 }
2328
2329 #[must_use]
2331 pub fn tolerance(mut self, tolerance: ErrorTolerance) -> Self {
2332 self.tolerance = tolerance;
2333 self
2334 }
2335
2336 #[must_use]
2338 pub fn include_partial_on_fatal(mut self, value: bool) -> Self {
2339 self.include_partial_on_fatal = value;
2340 self
2341 }
2342
2343 #[must_use]
2345 pub fn replace_invalid_with_null(mut self, value: bool) -> Self {
2346 self.replace_invalid_with_null = value;
2347 self
2348 }
2349
2350 #[must_use]
2352 pub fn build(self) -> PartialConfig {
2353 PartialConfig {
2354 from_json_config: self.from_json_config,
2355 tolerance: self.tolerance,
2356 include_partial_on_fatal: self.include_partial_on_fatal,
2357 replace_invalid_with_null: self.replace_invalid_with_null,
2358 }
2359 }
2360}
2361
2362#[derive(Debug)]
2364pub struct PartialResult {
2365 pub document: Option<Document>,
2367
2368 pub errors: Vec<ParseError>,
2370
2371 pub stopped_early: bool,
2373}
2374
2375impl PartialResult {
2376 #[must_use]
2378 pub fn is_complete(&self) -> bool {
2379 self.errors.is_empty() && self.document.is_some()
2380 }
2381
2382 #[must_use]
2384 pub fn is_failed(&self) -> bool {
2385 self.errors.iter().any(|e| e.is_fatal) || self.document.is_none()
2386 }
2387
2388 pub fn into_result(self) -> Result<Document, Vec<ParseError>> {
2390 if self.errors.is_empty() {
2391 self.document.ok_or_else(Vec::new)
2392 } else {
2393 Err(self.errors)
2394 }
2395 }
2396}
2397
2398struct ErrorContext {
2400 errors: Vec<ParseError>,
2401 config: PartialConfig,
2402 stopped: bool,
2403}
2404
2405impl ErrorContext {
2406 fn new(config: PartialConfig) -> Self {
2407 Self {
2408 errors: Vec::new(),
2409 config,
2410 stopped: false,
2411 }
2412 }
2413
2414 fn record_error(
2416 &mut self,
2417 error: JsonConversionError,
2418 location: ErrorLocation,
2419 is_fatal: bool,
2420 ) -> bool {
2421 if self.stopped {
2422 return false;
2423 }
2424
2425 let parse_error = ParseError::new(error, location, is_fatal);
2426 self.errors.push(parse_error);
2427
2428 let should_stop = match self.config.tolerance {
2430 ErrorTolerance::StopOnFirst => true,
2431 ErrorTolerance::MaxErrors(max) => self.errors.len() >= max,
2432 ErrorTolerance::CollectAll => false,
2433 ErrorTolerance::SkipInvalidItems => is_fatal,
2434 };
2435
2436 if should_stop {
2437 self.stopped = true;
2438 }
2439
2440 !should_stop
2441 }
2442
2443 fn should_continue(&self) -> bool {
2444 !self.stopped
2445 }
2446}
2447
2448#[must_use]
2468pub fn partial_parse_json(json: &str, config: &PartialConfig) -> PartialResult {
2469 let value = match serde_json::from_str::<JsonValue>(json) {
2471 Ok(v) => v,
2472 Err(e) => {
2473 return PartialResult {
2475 document: None,
2476 errors: vec![ParseError::new(
2477 JsonConversionError::ParseError(e.to_string()),
2478 ErrorLocation::root(),
2479 true,
2480 )],
2481 stopped_early: false,
2482 };
2483 }
2484 };
2485
2486 partial_parse_json_value(&value, config)
2487}
2488
2489#[must_use]
2491pub fn partial_parse_json_value(value: &JsonValue, config: &PartialConfig) -> PartialResult {
2492 let mut context = ErrorContext::new(config.clone());
2493 let mut structs = BTreeMap::new();
2494 let mut schema_cache = SchemaCache::new();
2495
2496 let root = if let JsonValue::Object(map) = value {
2498 match partial_json_object_to_root(
2499 map,
2500 &config.from_json_config,
2501 &mut structs,
2502 &mut schema_cache,
2503 0,
2504 &ErrorLocation::root(),
2505 &mut context,
2506 ) {
2507 Ok(root) => Some(root),
2508 Err(_) => {
2509 if config.include_partial_on_fatal {
2510 Some(BTreeMap::new())
2511 } else {
2512 None
2513 }
2514 }
2515 }
2516 } else {
2517 context.record_error(
2518 JsonConversionError::InvalidRoot(format!("{value:?}")),
2519 ErrorLocation::root(),
2520 true,
2521 );
2522 None
2523 };
2524
2525 let document = root.map(|root| Document {
2526 version: config.from_json_config.version,
2527 schema_versions: BTreeMap::new(),
2528 aliases: BTreeMap::new(),
2529 structs,
2530 nests: BTreeMap::new(),
2531 root,
2532 });
2533
2534 PartialResult {
2535 document,
2536 errors: context.errors,
2537 stopped_early: context.stopped,
2538 }
2539}
2540
2541fn partial_json_object_to_root(
2543 map: &Map<String, JsonValue>,
2544 config: &FromJsonConfig,
2545 structs: &mut BTreeMap<String, Vec<String>>,
2546 schema_cache: &mut SchemaCache,
2547 depth: usize,
2548 location: &ErrorLocation,
2549 context: &mut ErrorContext,
2550) -> Result<BTreeMap<String, Item>, JsonConversionError> {
2551 if let Some(max_size) = config.max_object_size {
2553 if map.len() > max_size {
2554 let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
2555 context.record_error(err.clone(), location.clone(), false);
2556 return Err(err);
2557 }
2558 }
2559
2560 let mut result = BTreeMap::new();
2561
2562 for (key, value) in map {
2563 if !context.should_continue() {
2564 break;
2565 }
2566
2567 if key.starts_with("__") {
2569 continue;
2570 }
2571
2572 let item_location = location.child(key);
2573 match partial_json_value_to_item(
2574 value,
2575 key,
2576 config,
2577 structs,
2578 schema_cache,
2579 depth,
2580 &item_location,
2581 context,
2582 ) {
2583 Ok(item) => {
2584 result.insert(key.clone(), item);
2585 }
2586 Err(_) => {
2587 if context.config.replace_invalid_with_null {
2589 result.insert(key.clone(), Item::Scalar(Value::Null));
2590 }
2591 }
2593 }
2594 }
2595
2596 Ok(result)
2597}
2598
2599#[allow(clippy::too_many_arguments)]
2601fn partial_json_value_to_item(
2602 value: &JsonValue,
2603 key: &str,
2604 config: &FromJsonConfig,
2605 structs: &mut BTreeMap<String, Vec<String>>,
2606 schema_cache: &mut SchemaCache,
2607 depth: usize,
2608 location: &ErrorLocation,
2609 context: &mut ErrorContext,
2610) -> Result<Item, JsonConversionError> {
2611 if let Some(max_depth) = config.max_depth {
2613 if depth >= max_depth {
2614 let err = JsonConversionError::MaxDepthExceeded(max_depth);
2615 context.record_error(err.clone(), location.clone(), false);
2616 return Err(err);
2617 }
2618 }
2619
2620 match value {
2621 JsonValue::Null => Ok(Item::Scalar(Value::Null)),
2622 JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
2623 JsonValue::Number(n) => match json_number_to_value(n) {
2624 Ok(value) => Ok(Item::Scalar(value)),
2625 Err(err) => {
2626 context.record_error(err.clone(), location.clone(), false);
2627 Err(err)
2628 }
2629 },
2630 JsonValue::String(s) => {
2631 if let Some(max_len) = config.max_string_length {
2633 if s.len() > max_len {
2634 let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
2635 context.record_error(err.clone(), location.clone(), false);
2636 return Err(err);
2637 }
2638 }
2639
2640 if s.starts_with("$(") && s.ends_with(')') {
2642 match parse_expression_token(s) {
2643 Ok(expr) => Ok(Item::Scalar(Value::Expression(Box::new(expr)))),
2644 Err(e) => {
2645 let err = JsonConversionError::InvalidExpression(e.to_string());
2646 context.record_error(err.clone(), location.clone(), false);
2647 Err(err)
2648 }
2649 }
2650 } else {
2651 Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
2652 }
2653 }
2654 JsonValue::Array(arr) => {
2655 if let Some(max_size) = config.max_array_size {
2657 if arr.len() > max_size {
2658 let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
2659 context.record_error(err.clone(), location.clone(), false);
2660 return Err(err);
2661 }
2662 }
2663
2664 if arr.is_empty() {
2666 let type_name = singularize_and_capitalize(key);
2667 let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
2668 let mut list = MatrixList::new(type_name.clone(), schema.clone());
2669 list.count_hint = Some(0);
2670 structs.insert(type_name, schema);
2671 Ok(Item::List(list))
2672 } else if is_tensor_array(arr) {
2673 match partial_json_array_to_tensor(arr, config, depth + 1, location, context) {
2674 Ok(tensor) => Ok(Item::Scalar(Value::Tensor(Box::new(tensor)))),
2675 Err(err) => Err(err),
2676 }
2677 } else if is_object_array(arr) {
2678 match partial_json_array_to_matrix_list(
2679 arr,
2680 key,
2681 config,
2682 structs,
2683 schema_cache,
2684 depth + 1,
2685 location,
2686 context,
2687 ) {
2688 Ok(list) => Ok(Item::List(list)),
2689 Err(err) => Err(err),
2690 }
2691 } else {
2692 match partial_json_array_to_tensor(arr, config, depth + 1, location, context) {
2694 Ok(tensor) => Ok(Item::Scalar(Value::Tensor(Box::new(tensor)))),
2695 Err(err) => Err(err),
2696 }
2697 }
2698 }
2699 JsonValue::Object(obj) => {
2700 if let Some(JsonValue::String(r)) = obj.get("@ref") {
2702 match parse_reference(r) {
2703 Ok(reference) => Ok(Item::Scalar(Value::Reference(reference))),
2704 Err(e) => {
2705 let err = JsonConversionError::InvalidReference(e);
2706 context.record_error(err.clone(), location.clone(), false);
2707 Err(err)
2708 }
2709 }
2710 } else {
2711 match partial_json_object_to_item_map(
2713 obj,
2714 config,
2715 structs,
2716 schema_cache,
2717 depth + 1,
2718 location,
2719 context,
2720 ) {
2721 Ok(item_map) => Ok(Item::Object(item_map)),
2722 Err(err) => Err(err),
2723 }
2724 }
2725 }
2726 }
2727}
2728
2729fn partial_json_object_to_item_map(
2731 map: &Map<String, JsonValue>,
2732 config: &FromJsonConfig,
2733 structs: &mut BTreeMap<String, Vec<String>>,
2734 schema_cache: &mut SchemaCache,
2735 depth: usize,
2736 location: &ErrorLocation,
2737 context: &mut ErrorContext,
2738) -> Result<BTreeMap<String, Item>, JsonConversionError> {
2739 if let Some(max_size) = config.max_object_size {
2741 if map.len() > max_size {
2742 let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
2743 context.record_error(err.clone(), location.clone(), false);
2744 return Err(err);
2745 }
2746 }
2747
2748 let mut result = BTreeMap::new();
2749
2750 for (key, value) in map {
2751 if !context.should_continue() {
2752 break;
2753 }
2754
2755 if key.starts_with("__") {
2756 continue;
2757 }
2758
2759 let item_location = location.child(key);
2760 match partial_json_value_to_item(
2761 value,
2762 key,
2763 config,
2764 structs,
2765 schema_cache,
2766 depth,
2767 &item_location,
2768 context,
2769 ) {
2770 Ok(item) => {
2771 result.insert(key.clone(), item);
2772 }
2773 Err(_) => {
2774 if context.config.replace_invalid_with_null {
2775 result.insert(key.clone(), Item::Scalar(Value::Null));
2776 }
2777 }
2778 }
2779 }
2780
2781 Ok(result)
2782}
2783
2784fn partial_json_array_to_tensor(
2786 arr: &[JsonValue],
2787 config: &FromJsonConfig,
2788 depth: usize,
2789 location: &ErrorLocation,
2790 context: &mut ErrorContext,
2791) -> Result<Tensor, JsonConversionError> {
2792 if let Some(max_depth) = config.max_depth {
2794 if depth >= max_depth {
2795 let err = JsonConversionError::MaxDepthExceeded(max_depth);
2796 context.record_error(err.clone(), location.clone(), false);
2797 return Err(err);
2798 }
2799 }
2800
2801 let mut items = Vec::with_capacity(arr.len());
2802
2803 for (idx, v) in arr.iter().enumerate() {
2804 if !context.should_continue() {
2805 break;
2806 }
2807
2808 let elem_location = location.index(idx);
2809 let tensor = match v {
2810 JsonValue::Number(n) => {
2811 if is_integer_overflow(n) {
2814 }
2817
2818 if let Some(f) = n.as_f64() {
2819 Ok(Tensor::Scalar(f))
2820 } else {
2821 let err = JsonConversionError::InvalidNumber(n.to_string());
2822 context.record_error(err.clone(), elem_location, false);
2823 Err(err)
2824 }
2825 }
2826 JsonValue::Array(nested) => {
2827 partial_json_array_to_tensor(nested, config, depth + 1, &elem_location, context)
2828 }
2829 _ => {
2830 let err = JsonConversionError::InvalidTensor;
2831 context.record_error(err.clone(), elem_location, false);
2832 Err(err)
2833 }
2834 };
2835
2836 match tensor {
2837 Ok(t) => items.push(t),
2838 Err(_) => {
2839 if context.config.replace_invalid_with_null {
2840 items.push(Tensor::Scalar(0.0));
2841 }
2842 }
2844 }
2845 }
2846
2847 Ok(Tensor::Array(items))
2848}
2849
2850#[allow(clippy::too_many_arguments)]
2852fn partial_json_array_to_matrix_list(
2853 arr: &[JsonValue],
2854 key: &str,
2855 config: &FromJsonConfig,
2856 structs: &mut BTreeMap<String, Vec<String>>,
2857 schema_cache: &mut SchemaCache,
2858 depth: usize,
2859 location: &ErrorLocation,
2860 context: &mut ErrorContext,
2861) -> Result<MatrixList, JsonConversionError> {
2862 if let Some(max_depth) = config.max_depth {
2864 if depth >= max_depth {
2865 let err = JsonConversionError::MaxDepthExceeded(max_depth);
2866 context.record_error(err.clone(), location.clone(), false);
2867 return Err(err);
2868 }
2869 }
2870
2871 let type_name = singularize_and_capitalize(key);
2872
2873 let schema: Vec<String> = if let Some(JsonValue::Object(first)) = arr.first() {
2875 if let Some(JsonValue::Array(schema_arr)) = first.get("__hedl_schema") {
2876 schema_arr
2877 .iter()
2878 .filter_map(|v| v.as_str().map(String::from))
2879 .collect()
2880 } else {
2881 let mut cache_key: Vec<String> = first
2882 .keys()
2883 .filter(|k| {
2884 if k.starts_with("__") {
2885 return false;
2886 }
2887 if let Some(JsonValue::Array(arr)) = first.get(*k) {
2888 !is_object_array(arr)
2889 } else {
2890 true
2891 }
2892 })
2893 .cloned()
2894 .collect();
2895 cache_key.sort();
2896
2897 if let Some(cached_schema) = schema_cache.get(&cache_key) {
2898 cached_schema.clone()
2899 } else {
2900 let mut keys = cache_key.clone();
2901 if let Some(pos) = keys.iter().position(|k| k == "id") {
2902 keys.remove(pos);
2903 keys.insert(0, "id".to_string());
2904 }
2905 schema_cache.insert(cache_key, keys.clone());
2906 keys
2907 }
2908 }
2909 } else {
2910 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
2911 };
2912
2913 let schema = if schema.is_empty() {
2914 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
2915 } else {
2916 schema
2917 };
2918
2919 structs.insert(type_name.clone(), schema.clone());
2920
2921 let mut rows = Vec::with_capacity(arr.len());
2922
2923 for (idx, item) in arr.iter().enumerate() {
2924 if !context.should_continue() {
2925 break;
2926 }
2927
2928 let row_location = location.index(idx);
2929
2930 if let JsonValue::Object(obj) = item {
2931 let id = obj
2932 .get(&schema[0])
2933 .and_then(|v| v.as_str())
2934 .unwrap_or("")
2935 .to_string();
2936
2937 let mut fields = Vec::with_capacity(schema.len());
2938 for col in &schema {
2939 match obj.get(col) {
2940 Some(v) => {
2941 match partial_json_to_value(v, config, &row_location.child(col), context) {
2942 Ok(value) => fields.push(value),
2943 Err(_) => {
2944 fields.push(Value::Null);
2946 }
2947 }
2948 }
2949 None => fields.push(Value::Null),
2950 }
2951 }
2952
2953 let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
2955 for (child_key, child_value) in obj {
2956 if !context.should_continue() {
2957 break;
2958 }
2959
2960 if let JsonValue::Array(child_arr) = child_value {
2961 if is_object_array(child_arr) {
2962 let child_location = row_location.child(child_key);
2963 if let Ok(child_list) = partial_json_array_to_matrix_list(
2964 child_arr,
2965 child_key,
2966 config,
2967 structs,
2968 schema_cache,
2969 depth + 1,
2970 &child_location,
2971 context,
2972 ) {
2973 children.insert(child_key.clone(), child_list.rows);
2974 } else {
2975 }
2977 }
2978 }
2979 }
2980
2981 let node = Node {
2982 type_name: type_name.clone(),
2983 id,
2984 fields: fields.into(),
2985 children: if children.is_empty() {
2986 None
2987 } else {
2988 Some(Box::new(children))
2989 },
2990 child_count: 0,
2991 };
2992
2993 rows.push(node);
2994 } else {
2995 let err = JsonConversionError::InvalidRoot("Expected object in array".to_string());
2997 context.record_error(err, row_location, false);
2998
2999 if context.config.tolerance == ErrorTolerance::SkipInvalidItems {
3001 continue;
3002 }
3003 }
3004 }
3005
3006 let count_hint = Some(rows.len());
3007
3008 Ok(MatrixList {
3009 type_name,
3010 schema,
3011 rows,
3012 count_hint,
3013 })
3014}
3015
3016fn partial_json_to_value(
3018 value: &JsonValue,
3019 config: &FromJsonConfig,
3020 location: &ErrorLocation,
3021 context: &mut ErrorContext,
3022) -> Result<Value, JsonConversionError> {
3023 match value {
3024 JsonValue::Null => Ok(Value::Null),
3025 JsonValue::Bool(b) => Ok(Value::Bool(*b)),
3026 JsonValue::Number(n) => match json_number_to_value(n) {
3027 Ok(value) => Ok(value),
3028 Err(err) => {
3029 context.record_error(err.clone(), location.clone(), false);
3030 Err(err)
3031 }
3032 },
3033 JsonValue::String(s) => {
3034 if let Some(max_len) = config.max_string_length {
3036 if s.len() > max_len {
3037 let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
3038 context.record_error(err.clone(), location.clone(), false);
3039 return Err(err);
3040 }
3041 }
3042
3043 if s.starts_with("$(") && s.ends_with(')') {
3045 match parse_expression_token(s) {
3046 Ok(expr) => Ok(Value::Expression(Box::new(expr))),
3047 Err(e) => {
3048 let err = JsonConversionError::InvalidExpression(e.to_string());
3049 context.record_error(err.clone(), location.clone(), false);
3050 Err(err)
3051 }
3052 }
3053 } else {
3054 Ok(Value::String(s.clone().into_boxed_str()))
3055 }
3056 }
3057 JsonValue::Array(arr) => {
3058 if let Some(max_size) = config.max_array_size {
3060 if arr.len() > max_size {
3061 let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
3062 context.record_error(err.clone(), location.clone(), false);
3063 return Err(err);
3064 }
3065 }
3066
3067 if is_object_array(arr) {
3068 Ok(Value::Null) } else if is_tensor_array(arr) {
3070 match partial_json_array_to_tensor(arr, config, 0, location, context) {
3071 Ok(tensor) => Ok(Value::Tensor(Box::new(tensor))),
3072 Err(err) => Err(err),
3073 }
3074 } else if arr.is_empty() {
3075 Ok(Value::Tensor(Box::new(Tensor::Array(vec![]))))
3076 } else {
3077 match partial_json_array_to_tensor(arr, config, 0, location, context) {
3078 Ok(tensor) => Ok(Value::Tensor(Box::new(tensor))),
3079 Err(err) => Err(err),
3080 }
3081 }
3082 }
3083 JsonValue::Object(obj) => {
3084 if let Some(JsonValue::String(r)) = obj.get("@ref") {
3085 match parse_reference(r) {
3086 Ok(reference) => Ok(Value::Reference(reference)),
3087 Err(e) => {
3088 let err = JsonConversionError::InvalidReference(e);
3089 context.record_error(err.clone(), location.clone(), false);
3090 Err(err)
3091 }
3092 }
3093 } else {
3094 let err = JsonConversionError::NestedObject;
3095 context.record_error(err.clone(), location.clone(), false);
3096 Err(err)
3097 }
3098 }
3099 }
3100}