qubit_json/lenient_json_decoder.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Defines the [`LenientJsonDecoder`] type and its public decoding methods.
11//!
12
13use serde::de::DeserializeOwned;
14use serde_json::{Value, error::Category};
15
16use crate::{
17 JsonDecodeError, JsonDecodeOptions, JsonTopLevelKind,
18 lenient_json_normalizer::LenientJsonNormalizer,
19};
20
21/// A configurable JSON decoder for non-fully-trusted text inputs.
22///
23/// `LenientJsonDecoder` applies a small set of predictable normalization rules
24/// before delegating actual parsing and deserialization to `serde_json`.
25///
26/// The decoder itself is stateless aside from its immutable configuration, so a
27/// single instance can be reused across many decoding calls.
28#[derive(Debug, Clone, Default)]
29pub struct LenientJsonDecoder {
30 /// Stores the immutable normalization and decoding configuration used by
31 /// this decoder instance.
32 normalizer: LenientJsonNormalizer,
33}
34
35impl LenientJsonDecoder {
36 /// Creates a decoder with the exact normalization rules in `options`.
37 ///
38 /// Reusing a decoder instance is recommended when multiple inputs should
39 /// follow the same lenient decoding policy.
40 #[must_use]
41 pub const fn new(options: JsonDecodeOptions) -> Self {
42 Self {
43 normalizer: LenientJsonNormalizer::new(options),
44 }
45 }
46
47 /// Returns the immutable options used by this decoder.
48 ///
49 /// This accessor allows callers to inspect the effective configuration
50 /// without cloning the decoder or duplicating the options elsewhere.
51 #[must_use]
52 pub const fn options(&self) -> &JsonDecodeOptions {
53 self.normalizer.options()
54 }
55
56 /// Decodes `input` into the target Rust type `T`.
57 ///
58 /// This method does not constrain the JSON top-level structure. Arrays,
59 /// objects, scalars, and any other JSON value kinds are all allowed as long
60 /// as they can be deserialized into `T`.
61 ///
62 /// The generic type `T` must implement [`DeserializeOwned`] because this
63 /// method deserializes directly from normalized text and does not return
64 /// values borrowing from the input.
65 ///
66 /// # Errors
67 ///
68 /// Returns [`JsonDecodeError`] when the input becomes empty after
69 /// normalization, when the normalized text is not valid JSON, or when the
70 /// parsed JSON value cannot be deserialized into `T`.
71 ///
72 /// # Examples
73 ///
74 /// ```rust
75 /// use qubit_json::LenientJsonDecoder;
76 ///
77 /// let decoder = LenientJsonDecoder::default();
78 /// let value: u64 = decoder
79 /// .decode("42")
80 /// .expect("a numeric JSON scalar should decode into u64");
81 ///
82 /// assert_eq!(value, 42);
83 /// ```
84 pub fn decode<T>(&self, input: &str) -> Result<T, JsonDecodeError>
85 where
86 T: DeserializeOwned,
87 {
88 let normalized = self.normalizer.normalize(input)?;
89 Self::deserialize_normalized(normalized.as_ref(), normalized.len())
90 }
91
92 /// Decodes `input` into a target type `T`, requiring a top-level JSON
93 /// object.
94 ///
95 /// This method is useful for APIs that require a structured object at the
96 /// top level and want an explicit error when an array or scalar is
97 /// received.
98 ///
99 /// # Errors
100 ///
101 /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
102 /// valid JSON value, when the top-level JSON kind is not an object, or
103 /// when the object cannot be deserialized into `T`.
104 ///
105 /// # Examples
106 ///
107 /// ```rust
108 /// use qubit_json::LenientJsonDecoder;
109 ///
110 /// let decoder = LenientJsonDecoder::default();
111 /// let value: serde_json::Value = decoder
112 /// .decode_object("```json\n{\"ok\":true}\n```")
113 /// .expect("a fenced JSON object should decode into a value");
114 ///
115 /// assert_eq!(value["ok"], true);
116 /// ```
117 pub fn decode_object<T>(&self, input: &str) -> Result<T, JsonDecodeError>
118 where
119 T: DeserializeOwned,
120 {
121 self.decode_with_top_level(input, JsonTopLevelKind::Object)
122 }
123
124 /// Decodes `input` into a `Vec<T>`, requiring a top-level JSON array.
125 ///
126 /// This method should be preferred over [`Self::decode`] when the caller
127 /// wants an explicit top-level array contract instead of relying on the
128 /// target type alone.
129 ///
130 /// # Errors
131 ///
132 /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
133 /// valid JSON value, when the top-level JSON kind is not an array, or when
134 /// the array cannot be deserialized into `Vec<T>`.
135 ///
136 /// # Examples
137 ///
138 /// ```rust
139 /// use qubit_json::{JsonDecodeErrorKind, LenientJsonDecoder};
140 ///
141 /// let decoder = LenientJsonDecoder::default();
142 /// let error = decoder
143 /// .decode_array::<serde_json::Value>("{\"ok\":true}")
144 /// .expect_err("a top-level object should fail an array contract");
145 ///
146 /// assert_eq!(error.kind, JsonDecodeErrorKind::UnexpectedTopLevel);
147 /// ```
148 pub fn decode_array<T>(&self, input: &str) -> Result<Vec<T>, JsonDecodeError>
149 where
150 T: DeserializeOwned,
151 {
152 self.decode_with_top_level(input, JsonTopLevelKind::Array)
153 }
154
155 /// Decodes `input` into a [`serde_json::Value`].
156 ///
157 /// This is the lowest-level public entry point. It exposes the normalized
158 /// and parsed JSON value before any additional type-specific
159 /// deserialization is attempted.
160 ///
161 /// # Errors
162 ///
163 /// Returns [`JsonDecodeError`] when the input is empty after normalization
164 /// or when the normalized text is not valid JSON syntax.
165 ///
166 /// # Examples
167 ///
168 /// ```rust
169 /// use qubit_json::{JsonDecodeOptions, LenientJsonDecoder};
170 ///
171 /// let decoder = LenientJsonDecoder::new(JsonDecodeOptions {
172 /// max_input_bytes: Some(16),
173 /// ..JsonDecodeOptions::default()
174 /// });
175 /// let value = decoder
176 /// .decode_value("{\"ok\":true}")
177 /// .expect("input within the size limit should decode");
178 ///
179 /// assert_eq!(value["ok"], true);
180 /// ```
181 pub fn decode_value(&self, input: &str) -> Result<Value, JsonDecodeError> {
182 let (value, _) = self.parse_input_as_value(input)?;
183 Ok(value)
184 }
185
186 /// Normalizes input text and parses it as a JSON value.
187 fn parse_input_as_value(&self, input: &str) -> Result<(Value, usize), JsonDecodeError> {
188 let normalized = self.normalizer.normalize(input)?;
189 let input_bytes = normalized.len();
190 let value = Self::parse_value(normalized.as_ref())?;
191 Ok((value, input_bytes))
192 }
193
194 /// Decodes input after enforcing a required top-level JSON kind.
195 fn decode_with_top_level<T>(
196 &self,
197 input: &str,
198 expected: JsonTopLevelKind,
199 ) -> Result<T, JsonDecodeError>
200 where
201 T: DeserializeOwned,
202 {
203 let (value, input_bytes) = self.parse_input_as_value(input)?;
204 Self::ensure_top_level_from_value(&value, expected)?;
205 Self::deserialize_value(value, input_bytes)
206 }
207
208 /// Parses normalized text into a JSON value.
209 ///
210 /// Syntax failures are mapped to the crate error model with normalized
211 /// input byte length included for diagnostics.
212 fn parse_value(normalized: &str) -> Result<Value, JsonDecodeError> {
213 serde_json::from_str(normalized)
214 .map_err(|error| JsonDecodeError::invalid_json(error, Some(normalized.len())))
215 }
216
217 /// Verifies that a parsed JSON value has the required top-level kind.
218 fn ensure_top_level_from_value(
219 value: &Value,
220 expected: JsonTopLevelKind,
221 ) -> Result<(), JsonDecodeError> {
222 let actual = JsonTopLevelKind::of(value);
223 if actual != expected {
224 return Err(JsonDecodeError::unexpected_top_level(expected, actual));
225 }
226 Ok(())
227 }
228
229 /// Deserializes normalized JSON text into the target type.
230 fn deserialize_normalized<T>(normalized: &str, input_bytes: usize) -> Result<T, JsonDecodeError>
231 where
232 T: DeserializeOwned,
233 {
234 serde_json::from_str(normalized).map_err(|error| Self::map_decode_error(error, input_bytes))
235 }
236
237 /// Deserializes a parsed JSON value into the target type.
238 fn deserialize_value<T>(value: Value, input_bytes: usize) -> Result<T, JsonDecodeError>
239 where
240 T: DeserializeOwned,
241 {
242 serde_json::from_value(value)
243 .map_err(|error| JsonDecodeError::deserialize(error, Some(input_bytes)))
244 }
245
246 /// Maps one `serde_json` error from direct typed decoding to the crate
247 /// error model.
248 ///
249 /// Syntax, EOF, and I/O categories are treated as invalid JSON input.
250 /// Data category errors are treated as type deserialization failures.
251 fn map_decode_error(error: serde_json::Error, input_bytes: usize) -> JsonDecodeError {
252 match error.classify() {
253 Category::Data => JsonDecodeError::deserialize(error, Some(input_bytes)),
254 Category::Io | Category::Syntax | Category::Eof => {
255 JsonDecodeError::invalid_json(error, Some(input_bytes))
256 }
257 }
258 }
259}