1use super::array_conversion::{is_object_array, is_tensor_array};
21use super::config::{
22 is_integer_overflow, json_number_to_value, FromJsonConfig, JsonConversionError, SchemaCache,
23};
24use crate::DEFAULT_SCHEMA;
25use hedl_core::convert::parse_reference;
26use hedl_core::lex::{parse_expression_token, singularize_and_capitalize, Tensor};
27use hedl_core::{Document, Item, MatrixList, Node, Value};
28use serde_json::{Map, Value as JsonValue};
29use std::collections::BTreeMap;
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33pub enum ErrorTolerance {
34 #[default]
36 StopOnFirst,
37
38 MaxErrors(usize),
40
41 CollectAll,
43
44 SkipInvalidItems,
46}
47
48#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ErrorLocation {
51 pub path: String,
53
54 pub depth: usize,
56}
57
58impl ErrorLocation {
59 fn root() -> Self {
60 Self {
61 path: "$".to_string(),
62 depth: 0,
63 }
64 }
65
66 fn child(&self, key: &str) -> Self {
67 Self {
68 path: format!("{}.{}", self.path, key),
69 depth: self.depth + 1,
70 }
71 }
72
73 fn index(&self, idx: usize) -> Self {
74 Self {
75 path: format!("{}[{}]", self.path, idx),
76 depth: self.depth + 1,
77 }
78 }
79}
80
81#[derive(Debug, Clone)]
83pub struct ParseError {
84 pub error: JsonConversionError,
86
87 pub location: ErrorLocation,
89
90 pub is_fatal: bool,
92}
93
94impl ParseError {
95 fn new(error: JsonConversionError, location: ErrorLocation, is_fatal: bool) -> Self {
96 Self {
97 error,
98 location,
99 is_fatal,
100 }
101 }
102}
103
104#[derive(Debug, Clone, Default)]
106pub struct PartialConfig {
107 pub from_json_config: FromJsonConfig,
109
110 pub tolerance: ErrorTolerance,
112
113 pub include_partial_on_fatal: bool,
115
116 pub replace_invalid_with_null: bool,
118}
119
120impl PartialConfig {
121 #[must_use]
123 pub fn builder() -> PartialConfigBuilder {
124 PartialConfigBuilder::default()
125 }
126}
127
128#[derive(Debug, Clone, Default)]
130pub struct PartialConfigBuilder {
131 from_json_config: FromJsonConfig,
132 tolerance: ErrorTolerance,
133 include_partial_on_fatal: bool,
134 replace_invalid_with_null: bool,
135}
136
137impl PartialConfigBuilder {
138 #[must_use]
140 pub fn from_json_config(mut self, config: FromJsonConfig) -> Self {
141 self.from_json_config = config;
142 self
143 }
144
145 #[must_use]
147 pub fn tolerance(mut self, tolerance: ErrorTolerance) -> Self {
148 self.tolerance = tolerance;
149 self
150 }
151
152 #[must_use]
154 pub fn include_partial_on_fatal(mut self, value: bool) -> Self {
155 self.include_partial_on_fatal = value;
156 self
157 }
158
159 #[must_use]
161 pub fn replace_invalid_with_null(mut self, value: bool) -> Self {
162 self.replace_invalid_with_null = value;
163 self
164 }
165
166 #[must_use]
168 pub fn build(self) -> PartialConfig {
169 PartialConfig {
170 from_json_config: self.from_json_config,
171 tolerance: self.tolerance,
172 include_partial_on_fatal: self.include_partial_on_fatal,
173 replace_invalid_with_null: self.replace_invalid_with_null,
174 }
175 }
176}
177
178#[derive(Debug)]
180pub struct PartialResult {
181 pub document: Option<Document>,
183
184 pub errors: Vec<ParseError>,
186
187 pub stopped_early: bool,
189}
190
191impl PartialResult {
192 #[must_use]
194 pub fn is_complete(&self) -> bool {
195 self.errors.is_empty() && self.document.is_some()
196 }
197
198 #[must_use]
200 pub fn is_failed(&self) -> bool {
201 self.errors.iter().any(|e| e.is_fatal) || self.document.is_none()
202 }
203
204 pub fn into_result(self) -> Result<Document, Vec<ParseError>> {
206 if self.errors.is_empty() {
207 self.document.ok_or_else(Vec::new)
208 } else {
209 Err(self.errors)
210 }
211 }
212}
213
214struct ErrorContext {
216 errors: Vec<ParseError>,
217 config: PartialConfig,
218 stopped: bool,
219}
220
221impl ErrorContext {
222 fn new(config: PartialConfig) -> Self {
223 Self {
224 errors: Vec::new(),
225 config,
226 stopped: false,
227 }
228 }
229
230 fn record_error(
232 &mut self,
233 error: JsonConversionError,
234 location: ErrorLocation,
235 is_fatal: bool,
236 ) -> bool {
237 if self.stopped {
238 return false;
239 }
240
241 let parse_error = ParseError::new(error, location, is_fatal);
242 self.errors.push(parse_error);
243
244 let should_stop = match self.config.tolerance {
246 ErrorTolerance::StopOnFirst => true,
247 ErrorTolerance::MaxErrors(max) => self.errors.len() >= max,
248 ErrorTolerance::CollectAll => false,
249 ErrorTolerance::SkipInvalidItems => is_fatal,
250 };
251
252 if should_stop {
253 self.stopped = true;
254 }
255
256 !should_stop
257 }
258
259 fn should_continue(&self) -> bool {
260 !self.stopped
261 }
262}
263
264struct PartialParseState<'a> {
269 config: &'a FromJsonConfig,
271 structs: &'a mut BTreeMap<String, Vec<String>>,
273 schema_cache: &'a mut SchemaCache,
275 context: &'a mut ErrorContext,
277}
278
279#[must_use]
299pub fn partial_parse_json(json: &str, config: &PartialConfig) -> PartialResult {
300 let value = match serde_json::from_str::<JsonValue>(json) {
302 Ok(v) => v,
303 Err(e) => {
304 return PartialResult {
306 document: None,
307 errors: vec![ParseError::new(
308 JsonConversionError::ParseError(e.to_string()),
309 ErrorLocation::root(),
310 true,
311 )],
312 stopped_early: false,
313 };
314 }
315 };
316
317 partial_parse_json_value(&value, config)
318}
319
320#[must_use]
322pub fn partial_parse_json_value(value: &JsonValue, config: &PartialConfig) -> PartialResult {
323 let mut context = ErrorContext::new(config.clone());
324 let mut structs = BTreeMap::new();
325 let mut schema_cache = SchemaCache::new();
326
327 let root = if let JsonValue::Object(map) = value {
329 let mut state = PartialParseState {
330 config: &config.from_json_config,
331 structs: &mut structs,
332 schema_cache: &mut schema_cache,
333 context: &mut context,
334 };
335 match partial_json_object_to_root(map, &mut state, 0, &ErrorLocation::root()) {
336 Ok(root) => Some(root),
337 Err(_) => {
338 if config.include_partial_on_fatal {
339 Some(BTreeMap::new())
340 } else {
341 None
342 }
343 }
344 }
345 } else {
346 context.record_error(
347 JsonConversionError::InvalidRoot(format!("{value:?}")),
348 ErrorLocation::root(),
349 true,
350 );
351 None
352 };
353
354 let document = root.map(|root| Document {
355 version: config.from_json_config.version,
356 schema_versions: BTreeMap::new(),
357 aliases: BTreeMap::new(),
358 structs,
359 nests: BTreeMap::new(),
360 root,
361 });
362
363 PartialResult {
364 document,
365 errors: context.errors,
366 stopped_early: context.stopped,
367 }
368}
369
370fn partial_json_object_to_root(
372 map: &Map<String, JsonValue>,
373 state: &mut PartialParseState<'_>,
374 depth: usize,
375 location: &ErrorLocation,
376) -> Result<BTreeMap<String, Item>, JsonConversionError> {
377 if let Some(max_size) = state.config.max_object_size {
379 if map.len() > max_size {
380 let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
381 state
382 .context
383 .record_error(err.clone(), location.clone(), false);
384 return Err(err);
385 }
386 }
387
388 let mut result = BTreeMap::new();
389
390 for (key, value) in map {
391 if !state.context.should_continue() {
392 break;
393 }
394
395 if key.starts_with("__") {
397 continue;
398 }
399
400 let item_location = location.child(key);
401 match partial_json_value_to_item(value, key, state, depth, &item_location) {
402 Ok(item) => {
403 result.insert(key.clone(), item);
404 }
405 Err(_) => {
406 if state.context.config.replace_invalid_with_null {
408 result.insert(key.clone(), Item::Scalar(Value::Null));
409 }
410 }
412 }
413 }
414
415 Ok(result)
416}
417
418fn partial_json_value_to_item(
420 value: &JsonValue,
421 key: &str,
422 state: &mut PartialParseState<'_>,
423 depth: usize,
424 location: &ErrorLocation,
425) -> Result<Item, JsonConversionError> {
426 if let Some(max_depth) = state.config.max_depth {
428 if depth >= max_depth {
429 let err = JsonConversionError::MaxDepthExceeded(max_depth);
430 state
431 .context
432 .record_error(err.clone(), location.clone(), false);
433 return Err(err);
434 }
435 }
436
437 match value {
438 JsonValue::Null => Ok(Item::Scalar(Value::Null)),
439 JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
440 JsonValue::Number(n) => match json_number_to_value(n) {
441 Ok(value) => Ok(Item::Scalar(value)),
442 Err(err) => {
443 state
444 .context
445 .record_error(err.clone(), location.clone(), false);
446 Err(err)
447 }
448 },
449 JsonValue::String(s) => {
450 if let Some(max_len) = state.config.max_string_length {
452 if s.len() > max_len {
453 let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
454 state
455 .context
456 .record_error(err.clone(), location.clone(), false);
457 return Err(err);
458 }
459 }
460
461 if s.starts_with("$(") && s.ends_with(')') {
463 match parse_expression_token(s) {
464 Ok(expr) => Ok(Item::Scalar(Value::Expression(Box::new(expr)))),
465 Err(e) => {
466 let err = JsonConversionError::InvalidExpression(e.to_string());
467 state
468 .context
469 .record_error(err.clone(), location.clone(), false);
470 Err(err)
471 }
472 }
473 } else {
474 Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
475 }
476 }
477 JsonValue::Array(arr) => {
478 if let Some(max_size) = state.config.max_array_size {
480 if arr.len() > max_size {
481 let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
482 state
483 .context
484 .record_error(err.clone(), location.clone(), false);
485 return Err(err);
486 }
487 }
488
489 if arr.is_empty() {
491 let type_name = singularize_and_capitalize(key);
492 let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
493 let mut list = MatrixList::new(type_name.clone(), schema.clone());
494 list.count_hint = Some(0);
495 state.structs.insert(type_name, schema);
496 Ok(Item::List(list))
497 } else if is_tensor_array(arr) {
498 match partial_json_array_to_tensor(
499 arr,
500 state.config,
501 depth + 1,
502 location,
503 state.context,
504 ) {
505 Ok(tensor) => Ok(Item::Scalar(Value::Tensor(Box::new(tensor)))),
506 Err(err) => Err(err),
507 }
508 } else if is_object_array(arr) {
509 match partial_json_array_to_matrix_list(arr, key, state, depth + 1, location) {
510 Ok(list) => Ok(Item::List(list)),
511 Err(err) => Err(err),
512 }
513 } else {
514 Ok(Item::Scalar(Value::String(
519 serde_json::to_string(&JsonValue::Array(arr.to_vec()))
520 .unwrap_or_else(|_| "[]".to_string())
521 .into_boxed_str(),
522 )))
523 }
524 }
525 JsonValue::Object(obj) => {
526 if let Some(JsonValue::String(r)) = obj.get("@ref") {
528 match parse_reference(r) {
529 Ok(reference) => Ok(Item::Scalar(Value::Reference(reference))),
530 Err(e) => {
531 let err = JsonConversionError::InvalidReference(e);
532 state
533 .context
534 .record_error(err.clone(), location.clone(), false);
535 Err(err)
536 }
537 }
538 } else {
539 match partial_json_object_to_item_map(obj, state, depth + 1, location) {
541 Ok(item_map) => Ok(Item::Object(item_map)),
542 Err(err) => Err(err),
543 }
544 }
545 }
546 }
547}
548
549fn partial_json_object_to_item_map(
551 map: &Map<String, JsonValue>,
552 state: &mut PartialParseState<'_>,
553 depth: usize,
554 location: &ErrorLocation,
555) -> Result<BTreeMap<String, Item>, JsonConversionError> {
556 if let Some(max_size) = state.config.max_object_size {
558 if map.len() > max_size {
559 let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
560 state
561 .context
562 .record_error(err.clone(), location.clone(), false);
563 return Err(err);
564 }
565 }
566
567 let mut result = BTreeMap::new();
568
569 for (key, value) in map {
570 if !state.context.should_continue() {
571 break;
572 }
573
574 if key.starts_with("__") {
575 continue;
576 }
577
578 let item_location = location.child(key);
579 match partial_json_value_to_item(value, key, state, depth, &item_location) {
580 Ok(item) => {
581 result.insert(key.clone(), item);
582 }
583 Err(_) => {
584 if state.context.config.replace_invalid_with_null {
585 result.insert(key.clone(), Item::Scalar(Value::Null));
586 }
587 }
588 }
589 }
590
591 Ok(result)
592}
593
594fn partial_json_array_to_tensor(
596 arr: &[JsonValue],
597 config: &FromJsonConfig,
598 depth: usize,
599 location: &ErrorLocation,
600 context: &mut ErrorContext,
601) -> Result<Tensor, JsonConversionError> {
602 if let Some(max_depth) = config.max_depth {
604 if depth >= max_depth {
605 let err = JsonConversionError::MaxDepthExceeded(max_depth);
606 context.record_error(err.clone(), location.clone(), false);
607 return Err(err);
608 }
609 }
610
611 let mut items = Vec::with_capacity(arr.len());
612
613 for (idx, v) in arr.iter().enumerate() {
614 if !context.should_continue() {
615 break;
616 }
617
618 let elem_location = location.index(idx);
619 let tensor = match v {
620 JsonValue::Number(n) => {
621 if is_integer_overflow(n) {
624 }
627
628 if let Some(f) = n.as_f64() {
629 Ok(Tensor::Scalar(f))
630 } else {
631 let err = JsonConversionError::InvalidNumber(n.to_string());
632 context.record_error(err.clone(), elem_location, false);
633 Err(err)
634 }
635 }
636 JsonValue::Array(nested) => {
637 partial_json_array_to_tensor(nested, config, depth + 1, &elem_location, context)
638 }
639 _ => {
640 let err = JsonConversionError::InvalidTensor;
641 context.record_error(err.clone(), elem_location, false);
642 Err(err)
643 }
644 };
645
646 match tensor {
647 Ok(t) => items.push(t),
648 Err(_) => {
649 if context.config.replace_invalid_with_null {
650 items.push(Tensor::Scalar(0.0));
651 }
652 }
654 }
655 }
656
657 Ok(Tensor::Array(items))
658}
659
660fn partial_json_array_to_matrix_list(
662 arr: &[JsonValue],
663 key: &str,
664 state: &mut PartialParseState<'_>,
665 depth: usize,
666 location: &ErrorLocation,
667) -> Result<MatrixList, JsonConversionError> {
668 if let Some(max_depth) = state.config.max_depth {
670 if depth >= max_depth {
671 let err = JsonConversionError::MaxDepthExceeded(max_depth);
672 state
673 .context
674 .record_error(err.clone(), location.clone(), false);
675 return Err(err);
676 }
677 }
678
679 let type_name = singularize_and_capitalize(key);
680
681 let schema: Vec<String> = if let Some(JsonValue::Object(first)) = arr.first() {
683 if let Some(JsonValue::Array(schema_arr)) = first.get("__hedl_schema") {
684 schema_arr
685 .iter()
686 .filter_map(|v| v.as_str().map(String::from))
687 .collect()
688 } else {
689 let mut cache_key: Vec<String> = first
690 .keys()
691 .filter(|k| {
692 if k.starts_with("__") {
693 return false;
694 }
695 if let Some(JsonValue::Array(arr)) = first.get(*k) {
696 !is_object_array(arr)
697 } else {
698 true
699 }
700 })
701 .cloned()
702 .collect();
703 cache_key.sort();
704
705 if let Some(cached_schema) = state.schema_cache.get(&cache_key) {
706 cached_schema.clone()
707 } else {
708 let mut keys = cache_key.clone();
709 if let Some(pos) = keys.iter().position(|k| k == "id") {
710 keys.remove(pos);
711 keys.insert(0, "id".to_string());
712 }
713 state.schema_cache.insert(cache_key, keys.clone());
714 keys
715 }
716 }
717 } else {
718 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
719 };
720
721 let schema = if schema.is_empty() {
722 DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
723 } else {
724 schema
725 };
726
727 state.structs.insert(type_name.clone(), schema.clone());
728
729 let mut rows = Vec::with_capacity(arr.len());
730
731 for (idx, item) in arr.iter().enumerate() {
732 if !state.context.should_continue() {
733 break;
734 }
735
736 let row_location = location.index(idx);
737
738 if let JsonValue::Object(obj) = item {
739 let id = obj
740 .get(&schema[0])
741 .and_then(|v| v.as_str())
742 .unwrap_or("")
743 .to_string();
744
745 let mut fields = Vec::with_capacity(schema.len());
746 for col in &schema {
747 match obj.get(col) {
748 Some(v) => {
749 match partial_json_to_value(
750 v,
751 state.config,
752 &row_location.child(col),
753 state.context,
754 ) {
755 Ok(value) => fields.push(value),
756 Err(_) => {
757 fields.push(Value::Null);
759 }
760 }
761 }
762 None => fields.push(Value::Null),
763 }
764 }
765
766 let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
768 for (child_key, child_value) in obj {
769 if !state.context.should_continue() {
770 break;
771 }
772
773 if let JsonValue::Array(child_arr) = child_value {
774 if is_object_array(child_arr) {
775 let child_location = row_location.child(child_key);
776 if let Ok(child_list) = partial_json_array_to_matrix_list(
777 child_arr,
778 child_key,
779 state,
780 depth + 1,
781 &child_location,
782 ) {
783 children.insert(child_key.clone(), child_list.rows);
784 } else {
785 }
787 }
788 }
789 }
790
791 let node = Node {
792 type_name: type_name.clone(),
793 id,
794 fields: fields.into(),
795 children: if children.is_empty() {
796 None
797 } else {
798 Some(Box::new(children))
799 },
800 child_count: 0,
801 };
802
803 rows.push(node);
804 } else {
805 let err = JsonConversionError::InvalidRoot("Expected object in array".to_string());
807 state.context.record_error(err, row_location, false);
808
809 if state.context.config.tolerance == ErrorTolerance::SkipInvalidItems {
811 continue;
812 }
813 }
814 }
815
816 let count_hint = Some(rows.len());
817
818 Ok(MatrixList {
819 type_name,
820 schema,
821 rows,
822 count_hint,
823 })
824}
825
826fn partial_json_to_value(
828 value: &JsonValue,
829 config: &FromJsonConfig,
830 location: &ErrorLocation,
831 context: &mut ErrorContext,
832) -> Result<Value, JsonConversionError> {
833 match value {
834 JsonValue::Null => Ok(Value::Null),
835 JsonValue::Bool(b) => Ok(Value::Bool(*b)),
836 JsonValue::Number(n) => match json_number_to_value(n) {
837 Ok(value) => Ok(value),
838 Err(err) => {
839 context.record_error(err.clone(), location.clone(), false);
840 Err(err)
841 }
842 },
843 JsonValue::String(s) => {
844 if let Some(max_len) = config.max_string_length {
846 if s.len() > max_len {
847 let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
848 context.record_error(err.clone(), location.clone(), false);
849 return Err(err);
850 }
851 }
852
853 if s.starts_with("$(") && s.ends_with(')') {
855 match parse_expression_token(s) {
856 Ok(expr) => Ok(Value::Expression(Box::new(expr))),
857 Err(e) => {
858 let err = JsonConversionError::InvalidExpression(e.to_string());
859 context.record_error(err.clone(), location.clone(), false);
860 Err(err)
861 }
862 }
863 } else {
864 Ok(Value::String(s.clone().into_boxed_str()))
865 }
866 }
867 JsonValue::Array(arr) => {
868 if let Some(max_size) = config.max_array_size {
870 if arr.len() > max_size {
871 let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
872 context.record_error(err.clone(), location.clone(), false);
873 return Err(err);
874 }
875 }
876
877 if is_object_array(arr) {
878 Ok(Value::Null) } else if is_tensor_array(arr) {
880 match partial_json_array_to_tensor(arr, config, 0, location, context) {
881 Ok(tensor) => Ok(Value::Tensor(Box::new(tensor))),
882 Err(err) => Err(err),
883 }
884 } else if arr.is_empty() {
885 Ok(Value::Tensor(Box::new(Tensor::Array(vec![]))))
886 } else {
887 Ok(Value::String(
890 serde_json::to_string(value)
891 .unwrap_or_else(|_| "[]".to_string())
892 .into_boxed_str(),
893 ))
894 }
895 }
896 JsonValue::Object(obj) => {
897 if let Some(JsonValue::String(r)) = obj.get("@ref") {
898 match parse_reference(r) {
899 Ok(reference) => Ok(Value::Reference(reference)),
900 Err(e) => {
901 let err = JsonConversionError::InvalidReference(e);
902 context.record_error(err.clone(), location.clone(), false);
903 Err(err)
904 }
905 }
906 } else {
907 let err = JsonConversionError::NestedObject;
908 context.record_error(err.clone(), location.clone(), false);
909 Err(err)
910 }
911 }
912 }
913}