Skip to main content

qubit_json/
lenient_json_decoder.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Defines the [`LenientJsonDecoder`] type and its public decoding methods.
11//!
12
13use serde::de::DeserializeOwned;
14use serde_json::{Value, error::Category};
15
16use crate::{
17    JsonDecodeError, JsonDecodeOptions, JsonTopLevelKind,
18    lenient_json_normalizer::LenientJsonNormalizer,
19};
20
21/// A configurable JSON decoder for non-fully-trusted text inputs.
22///
23/// `LenientJsonDecoder` applies a small set of predictable normalization rules
24/// before delegating actual parsing and deserialization to `serde_json`.
25///
26/// The decoder itself is stateless aside from its immutable configuration, so a
27/// single instance can be reused across many decoding calls.
28#[derive(Debug, Clone, Default)]
29pub struct LenientJsonDecoder {
30    /// Stores the immutable normalization and decoding configuration used by
31    /// this decoder instance.
32    normalizer: LenientJsonNormalizer,
33}
34
35impl LenientJsonDecoder {
36    /// Creates a decoder with the exact normalization rules in `options`.
37    ///
38    /// Reusing a decoder instance is recommended when multiple inputs should
39    /// follow the same lenient decoding policy.
40    #[must_use]
41    pub const fn new(options: JsonDecodeOptions) -> Self {
42        Self {
43            normalizer: LenientJsonNormalizer::new(options),
44        }
45    }
46
47    /// Returns the immutable options used by this decoder.
48    ///
49    /// This accessor allows callers to inspect the effective configuration
50    /// without cloning the decoder or duplicating the options elsewhere.
51    #[must_use]
52    pub const fn options(&self) -> &JsonDecodeOptions {
53        self.normalizer.options()
54    }
55
56    /// Decodes `input` into the target Rust type `T`.
57    ///
58    /// This method does not constrain the JSON top-level structure. Arrays,
59    /// objects, scalars, and any other JSON value kinds are all allowed as long
60    /// as they can be deserialized into `T`.
61    ///
62    /// The generic type `T` must implement [`DeserializeOwned`] because this
63    /// method deserializes directly from normalized text and does not return
64    /// values borrowing from the input.
65    ///
66    /// # Errors
67    ///
68    /// Returns [`JsonDecodeError`] when the input becomes empty after
69    /// normalization, when the normalized text is not valid JSON, or when the
70    /// parsed JSON value cannot be deserialized into `T`.
71    ///
72    /// # Examples
73    ///
74    /// ```rust
75    /// use qubit_json::LenientJsonDecoder;
76    ///
77    /// let decoder = LenientJsonDecoder::default();
78    /// let value: u64 = decoder
79    ///     .decode("42")
80    ///     .expect("a numeric JSON scalar should decode into u64");
81    ///
82    /// assert_eq!(value, 42);
83    /// ```
84    pub fn decode<T>(&self, input: &str) -> Result<T, JsonDecodeError>
85    where
86        T: DeserializeOwned,
87    {
88        let normalized = self.normalizer.normalize(input)?;
89        Self::deserialize_normalized(normalized.as_ref(), normalized.len())
90    }
91
92    /// Decodes `input` into a target type `T`, requiring a top-level JSON
93    /// object.
94    ///
95    /// This method is useful for APIs that require a structured object at the
96    /// top level and want an explicit error when an array or scalar is
97    /// received.
98    ///
99    /// # Errors
100    ///
101    /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
102    /// valid JSON value, when the top-level JSON kind is not an object, or
103    /// when the object cannot be deserialized into `T`.
104    ///
105    /// # Examples
106    ///
107    /// ```rust
108    /// use qubit_json::LenientJsonDecoder;
109    ///
110    /// let decoder = LenientJsonDecoder::default();
111    /// let value: serde_json::Value = decoder
112    ///     .decode_object("```json\n{\"ok\":true}\n```")
113    ///     .expect("a fenced JSON object should decode into a value");
114    ///
115    /// assert_eq!(value["ok"], true);
116    /// ```
117    pub fn decode_object<T>(&self, input: &str) -> Result<T, JsonDecodeError>
118    where
119        T: DeserializeOwned,
120    {
121        self.decode_with_top_level(input, JsonTopLevelKind::Object)
122    }
123
124    /// Decodes `input` into a `Vec<T>`, requiring a top-level JSON array.
125    ///
126    /// This method should be preferred over [`Self::decode`] when the caller
127    /// wants an explicit top-level array contract instead of relying on the
128    /// target type alone.
129    ///
130    /// # Errors
131    ///
132    /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
133    /// valid JSON value, when the top-level JSON kind is not an array, or when
134    /// the array cannot be deserialized into `Vec<T>`.
135    ///
136    /// # Examples
137    ///
138    /// ```rust
139    /// use qubit_json::{JsonDecodeErrorKind, LenientJsonDecoder};
140    ///
141    /// let decoder = LenientJsonDecoder::default();
142    /// let error = decoder
143    ///     .decode_array::<serde_json::Value>("{\"ok\":true}")
144    ///     .expect_err("a top-level object should fail an array contract");
145    ///
146    /// assert_eq!(error.kind, JsonDecodeErrorKind::UnexpectedTopLevel);
147    /// ```
148    pub fn decode_array<T>(&self, input: &str) -> Result<Vec<T>, JsonDecodeError>
149    where
150        T: DeserializeOwned,
151    {
152        self.decode_with_top_level(input, JsonTopLevelKind::Array)
153    }
154
155    /// Decodes `input` into a [`serde_json::Value`].
156    ///
157    /// This is the lowest-level public entry point. It exposes the normalized
158    /// and parsed JSON value before any additional type-specific
159    /// deserialization is attempted.
160    ///
161    /// # Errors
162    ///
163    /// Returns [`JsonDecodeError`] when the input is empty after normalization
164    /// or when the normalized text is not valid JSON syntax.
165    ///
166    /// # Examples
167    ///
168    /// ```rust
169    /// use qubit_json::{JsonDecodeOptions, LenientJsonDecoder};
170    ///
171    /// let decoder = LenientJsonDecoder::new(JsonDecodeOptions {
172    ///     max_input_bytes: Some(16),
173    ///     ..JsonDecodeOptions::default()
174    /// });
175    /// let value = decoder
176    ///     .decode_value("{\"ok\":true}")
177    ///     .expect("input within the size limit should decode");
178    ///
179    /// assert_eq!(value["ok"], true);
180    /// ```
181    pub fn decode_value(&self, input: &str) -> Result<Value, JsonDecodeError> {
182        let (value, _) = self.parse_input_as_value(input)?;
183        Ok(value)
184    }
185
186    /// Normalizes input text and parses it as a JSON value.
187    fn parse_input_as_value(&self, input: &str) -> Result<(Value, usize), JsonDecodeError> {
188        let normalized = self.normalizer.normalize(input)?;
189        let input_bytes = normalized.len();
190        let value = Self::parse_value(normalized.as_ref())?;
191        Ok((value, input_bytes))
192    }
193
194    /// Decodes input after enforcing a required top-level JSON kind.
195    fn decode_with_top_level<T>(
196        &self,
197        input: &str,
198        expected: JsonTopLevelKind,
199    ) -> Result<T, JsonDecodeError>
200    where
201        T: DeserializeOwned,
202    {
203        let (value, input_bytes) = self.parse_input_as_value(input)?;
204        Self::ensure_top_level_from_value(&value, expected)?;
205        Self::deserialize_value(value, input_bytes)
206    }
207
208    /// Parses normalized text into a JSON value.
209    ///
210    /// Syntax failures are mapped to the crate error model with normalized
211    /// input byte length included for diagnostics.
212    fn parse_value(normalized: &str) -> Result<Value, JsonDecodeError> {
213        serde_json::from_str(normalized)
214            .map_err(|error| JsonDecodeError::invalid_json(error, Some(normalized.len())))
215    }
216
217    /// Verifies that a parsed JSON value has the required top-level kind.
218    fn ensure_top_level_from_value(
219        value: &Value,
220        expected: JsonTopLevelKind,
221    ) -> Result<(), JsonDecodeError> {
222        let actual = JsonTopLevelKind::of(value);
223        if actual != expected {
224            return Err(JsonDecodeError::unexpected_top_level(expected, actual));
225        }
226        Ok(())
227    }
228
229    /// Deserializes normalized JSON text into the target type.
230    fn deserialize_normalized<T>(normalized: &str, input_bytes: usize) -> Result<T, JsonDecodeError>
231    where
232        T: DeserializeOwned,
233    {
234        serde_json::from_str(normalized).map_err(|error| Self::map_decode_error(error, input_bytes))
235    }
236
237    /// Deserializes a parsed JSON value into the target type.
238    fn deserialize_value<T>(value: Value, input_bytes: usize) -> Result<T, JsonDecodeError>
239    where
240        T: DeserializeOwned,
241    {
242        serde_json::from_value(value)
243            .map_err(|error| JsonDecodeError::deserialize(error, Some(input_bytes)))
244    }
245
246    /// Maps one `serde_json` error from direct typed decoding to the crate
247    /// error model.
248    ///
249    /// Syntax, EOF, and I/O categories are treated as invalid JSON input.
250    /// Data category errors are treated as type deserialization failures.
251    fn map_decode_error(error: serde_json::Error, input_bytes: usize) -> JsonDecodeError {
252        match error.classify() {
253            Category::Data => JsonDecodeError::deserialize(error, Some(input_bytes)),
254            Category::Io | Category::Syntax | Category::Eof => {
255                JsonDecodeError::invalid_json(error, Some(input_bytes))
256            }
257        }
258    }
259}