qubit_json/lenient_json_decoder.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026.
4 * Haixing Hu, Qubit Co. Ltd.
5 *
6 * All rights reserved.
7 *
8 ******************************************************************************/
9//! Defines the [`LenientJsonDecoder`] type and its public decoding methods.
10//!
11//! Author: Haixing Hu
12
13use serde::de::DeserializeOwned;
14use serde_json::{Value, error::Category};
15
16use crate::{JsonDecodeError, JsonDecodeOptions, JsonTopLevelKind, LenientJsonNormalizer};
17
18/// A configurable JSON decoder for non-fully-trusted text inputs.
19///
20/// `LenientJsonDecoder` applies a small set of predictable normalization rules
21/// before delegating actual parsing and deserialization to `serde_json`.
22///
23/// The decoder itself is stateless aside from its immutable configuration, so a
24/// single instance can be reused across many decoding calls.
25#[derive(Debug, Clone, Default)]
26pub struct LenientJsonDecoder {
27 /// Stores the immutable normalization and decoding configuration used by
28 /// this decoder instance.
29 normalizer: LenientJsonNormalizer,
30}
31
32impl LenientJsonDecoder {
33 /// Creates a decoder with the exact normalization rules in `options`.
34 ///
35 /// Reusing a decoder instance is recommended when multiple inputs should
36 /// follow the same lenient decoding policy.
37 #[must_use]
38 pub const fn new(options: JsonDecodeOptions) -> Self {
39 Self {
40 normalizer: LenientJsonNormalizer::new(options),
41 }
42 }
43
44 /// Returns the immutable options used by this decoder.
45 ///
46 /// This accessor allows callers to inspect the effective configuration
47 /// without cloning the decoder or duplicating the options elsewhere.
48 #[must_use]
49 pub const fn options(&self) -> &JsonDecodeOptions {
50 self.normalizer.options()
51 }
52
53 /// Decodes `input` into the target Rust type `T`.
54 ///
55 /// This method does not constrain the JSON top-level structure. Arrays,
56 /// objects, scalars, and any other JSON value kinds are all allowed as long
57 /// as they can be deserialized into `T`.
58 ///
59 /// The generic type `T` must implement [`DeserializeOwned`], because the
60 /// decoder first builds an owned [`Value`] and then deserializes from it.
61 ///
62 /// # Errors
63 ///
64 /// Returns [`JsonDecodeError`] when the input becomes empty after
65 /// normalization, when the normalized text is not valid JSON, or when the
66 /// parsed JSON value cannot be deserialized into `T`.
67 pub fn decode<T>(&self, input: &str) -> Result<T, JsonDecodeError>
68 where
69 T: DeserializeOwned,
70 {
71 let normalized = self.normalizer.normalize(input)?;
72 serde_json::from_str(normalized.as_ref())
73 .map_err(|error| Self::map_decode_error(error, normalized.len()))
74 }
75
76 /// Decodes `input` into a target type `T`, requiring a top-level JSON
77 /// object.
78 ///
79 /// This method is useful for APIs that require a structured object at the
80 /// top level and want an explicit error when an array or scalar is
81 /// received.
82 ///
83 /// # Errors
84 ///
85 /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
86 /// valid JSON value, when the top-level JSON kind is not an object, or
87 /// when the object cannot be deserialized into `T`.
88 pub fn decode_object<T>(&self, input: &str) -> Result<T, JsonDecodeError>
89 where
90 T: DeserializeOwned,
91 {
92 let normalized = self.normalizer.normalize(input)?;
93 self.ensure_top_level_from_text(normalized.as_ref(), JsonTopLevelKind::Object)?;
94 serde_json::from_str(normalized.as_ref())
95 .map_err(|error| Self::map_decode_error(error, normalized.len()))
96 }
97
98 /// Decodes `input` into a `Vec<T>`, requiring a top-level JSON array.
99 ///
100 /// This method should be preferred over [`Self::decode`] when the caller
101 /// wants an explicit top-level array contract instead of relying on the
102 /// target type alone.
103 ///
104 /// # Errors
105 ///
106 /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
107 /// valid JSON value, when the top-level JSON kind is not an array, or when
108 /// the array cannot be deserialized into `Vec<T>`.
109 pub fn decode_array<T>(&self, input: &str) -> Result<Vec<T>, JsonDecodeError>
110 where
111 T: DeserializeOwned,
112 {
113 let normalized = self.normalizer.normalize(input)?;
114 self.ensure_top_level_from_text(normalized.as_ref(), JsonTopLevelKind::Array)?;
115 serde_json::from_str(normalized.as_ref())
116 .map_err(|error| Self::map_decode_error(error, normalized.len()))
117 }
118
119 /// Decodes `input` into a [`serde_json::Value`].
120 ///
121 /// This is the lowest-level public entry point. It exposes the normalized
122 /// and parsed JSON value before any additional type-specific
123 /// deserialization is attempted.
124 ///
125 /// # Errors
126 ///
127 /// Returns [`JsonDecodeError`] when the input is empty after normalization
128 /// or when the normalized text is not valid JSON syntax.
129 pub fn decode_value(&self, input: &str) -> Result<Value, JsonDecodeError> {
130 let normalized = self.normalizer.normalize(input)?;
131 serde_json::from_str(normalized.as_ref())
132 .map_err(|error| JsonDecodeError::invalid_json(error, Some(normalized.len())))
133 }
134
135 /// Verifies that the normalized text starts with the required top-level
136 /// JSON kind token, when such a token can be classified cheaply.
137 fn ensure_top_level_from_text(
138 &self,
139 normalized: &str,
140 expected: JsonTopLevelKind,
141 ) -> Result<(), JsonDecodeError> {
142 if let Some(actual) = Self::classify_top_level_from_text(normalized)
143 && actual != expected
144 {
145 return Err(JsonDecodeError::unexpected_top_level(expected, actual));
146 }
147 Ok(())
148 }
149
150 /// Classifies the top-level JSON kind from the first significant character.
151 ///
152 /// Returns `None` when the first non-whitespace character is missing or not
153 /// a valid JSON token start, in which case full parsing should handle the
154 /// error mapping.
155 fn classify_top_level_from_text(input: &str) -> Option<JsonTopLevelKind> {
156 let first = input.chars().find(|ch| !ch.is_whitespace())?;
157 match first {
158 '{' => Some(JsonTopLevelKind::Object),
159 '[' => Some(JsonTopLevelKind::Array),
160 '"' | '-' | '0'..='9' | 't' | 'f' | 'n' => Some(JsonTopLevelKind::Other),
161 _ => None,
162 }
163 }
164
165 /// Maps one `serde_json` error from direct typed decoding to the crate
166 /// error model.
167 ///
168 /// Syntax, EOF, and I/O categories are treated as invalid JSON input.
169 /// Data category errors are treated as type deserialization failures.
170 fn map_decode_error(error: serde_json::Error, input_bytes: usize) -> JsonDecodeError {
171 match error.classify() {
172 Category::Data => JsonDecodeError::deserialize(error, Some(input_bytes)),
173 Category::Io | Category::Syntax | Category::Eof => {
174 JsonDecodeError::invalid_json(error, Some(input_bytes))
175 }
176 }
177 }
178}