Skip to main content

qubit_json/
lenient_json_decoder.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026.
4 *    Haixing Hu, Qubit Co. Ltd.
5 *
6 *    All rights reserved.
7 *
8 ******************************************************************************/
9//! Defines the [`LenientJsonDecoder`] type and its public decoding methods.
10//!
11//! Author: Haixing Hu
12
13use serde::de::DeserializeOwned;
14use serde_json::{Value, error::Category};
15
16use crate::{JsonDecodeError, JsonDecodeOptions, JsonTopLevelKind, LenientJsonNormalizer};
17
18/// A configurable JSON decoder for non-fully-trusted text inputs.
19///
20/// `LenientJsonDecoder` applies a small set of predictable normalization rules
21/// before delegating actual parsing and deserialization to `serde_json`.
22///
23/// The decoder itself is stateless aside from its immutable configuration, so a
24/// single instance can be reused across many decoding calls.
25#[derive(Debug, Clone, Default)]
26pub struct LenientJsonDecoder {
27    /// Stores the immutable normalization and decoding configuration used by
28    /// this decoder instance.
29    normalizer: LenientJsonNormalizer,
30}
31
32impl LenientJsonDecoder {
33    /// Creates a decoder with the exact normalization rules in `options`.
34    ///
35    /// Reusing a decoder instance is recommended when multiple inputs should
36    /// follow the same lenient decoding policy.
37    #[must_use]
38    pub const fn new(options: JsonDecodeOptions) -> Self {
39        Self {
40            normalizer: LenientJsonNormalizer::new(options),
41        }
42    }
43
44    /// Returns the immutable options used by this decoder.
45    ///
46    /// This accessor allows callers to inspect the effective configuration
47    /// without cloning the decoder or duplicating the options elsewhere.
48    #[must_use]
49    pub const fn options(&self) -> &JsonDecodeOptions {
50        self.normalizer.options()
51    }
52
53    /// Decodes `input` into the target Rust type `T`.
54    ///
55    /// This method does not constrain the JSON top-level structure. Arrays,
56    /// objects, scalars, and any other JSON value kinds are all allowed as long
57    /// as they can be deserialized into `T`.
58    ///
59    /// The generic type `T` must implement [`DeserializeOwned`], because the
60    /// decoder first builds an owned [`Value`] and then deserializes from it.
61    ///
62    /// # Errors
63    ///
64    /// Returns [`JsonDecodeError`] when the input becomes empty after
65    /// normalization, when the normalized text is not valid JSON, or when the
66    /// parsed JSON value cannot be deserialized into `T`.
67    pub fn decode<T>(&self, input: &str) -> Result<T, JsonDecodeError>
68    where
69        T: DeserializeOwned,
70    {
71        let normalized = self.normalizer.normalize(input)?;
72        serde_json::from_str(normalized.as_ref())
73            .map_err(|error| Self::map_decode_error(error, normalized.len()))
74    }
75
76    /// Decodes `input` into a target type `T`, requiring a top-level JSON
77    /// object.
78    ///
79    /// This method is useful for APIs that require a structured object at the
80    /// top level and want an explicit error when an array or scalar is
81    /// received.
82    ///
83    /// # Errors
84    ///
85    /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
86    /// valid JSON value, when the top-level JSON kind is not an object, or
87    /// when the object cannot be deserialized into `T`.
88    pub fn decode_object<T>(&self, input: &str) -> Result<T, JsonDecodeError>
89    where
90        T: DeserializeOwned,
91    {
92        let normalized = self.normalizer.normalize(input)?;
93        self.ensure_top_level_from_text(normalized.as_ref(), JsonTopLevelKind::Object)?;
94        serde_json::from_str(normalized.as_ref())
95            .map_err(|error| Self::map_decode_error(error, normalized.len()))
96    }
97
98    /// Decodes `input` into a `Vec<T>`, requiring a top-level JSON array.
99    ///
100    /// This method should be preferred over [`Self::decode`] when the caller
101    /// wants an explicit top-level array contract instead of relying on the
102    /// target type alone.
103    ///
104    /// # Errors
105    ///
106    /// Returns [`JsonDecodeError`] when the input cannot be normalized into a
107    /// valid JSON value, when the top-level JSON kind is not an array, or when
108    /// the array cannot be deserialized into `Vec<T>`.
109    pub fn decode_array<T>(&self, input: &str) -> Result<Vec<T>, JsonDecodeError>
110    where
111        T: DeserializeOwned,
112    {
113        let normalized = self.normalizer.normalize(input)?;
114        self.ensure_top_level_from_text(normalized.as_ref(), JsonTopLevelKind::Array)?;
115        serde_json::from_str(normalized.as_ref())
116            .map_err(|error| Self::map_decode_error(error, normalized.len()))
117    }
118
119    /// Decodes `input` into a [`serde_json::Value`].
120    ///
121    /// This is the lowest-level public entry point. It exposes the normalized
122    /// and parsed JSON value before any additional type-specific
123    /// deserialization is attempted.
124    ///
125    /// # Errors
126    ///
127    /// Returns [`JsonDecodeError`] when the input is empty after normalization
128    /// or when the normalized text is not valid JSON syntax.
129    pub fn decode_value(&self, input: &str) -> Result<Value, JsonDecodeError> {
130        let normalized = self.normalizer.normalize(input)?;
131        serde_json::from_str(normalized.as_ref())
132            .map_err(|error| JsonDecodeError::invalid_json(error, Some(normalized.len())))
133    }
134
135    /// Verifies that the normalized text starts with the required top-level
136    /// JSON kind token, when such a token can be classified cheaply.
137    fn ensure_top_level_from_text(
138        &self,
139        normalized: &str,
140        expected: JsonTopLevelKind,
141    ) -> Result<(), JsonDecodeError> {
142        if let Some(actual) = Self::classify_top_level_from_text(normalized)
143            && actual != expected
144        {
145            return Err(JsonDecodeError::unexpected_top_level(expected, actual));
146        }
147        Ok(())
148    }
149
150    /// Classifies the top-level JSON kind from the first significant character.
151    ///
152    /// Returns `None` when the first non-whitespace character is missing or not
153    /// a valid JSON token start, in which case full parsing should handle the
154    /// error mapping.
155    fn classify_top_level_from_text(input: &str) -> Option<JsonTopLevelKind> {
156        let first = input.chars().find(|ch| !ch.is_whitespace())?;
157        match first {
158            '{' => Some(JsonTopLevelKind::Object),
159            '[' => Some(JsonTopLevelKind::Array),
160            '"' | '-' | '0'..='9' | 't' | 'f' | 'n' => Some(JsonTopLevelKind::Other),
161            _ => None,
162        }
163    }
164
165    /// Maps one `serde_json` error from direct typed decoding to the crate
166    /// error model.
167    ///
168    /// Syntax, EOF, and I/O categories are treated as invalid JSON input.
169    /// Data category errors are treated as type deserialization failures.
170    fn map_decode_error(error: serde_json::Error, input_bytes: usize) -> JsonDecodeError {
171        match error.classify() {
172            Category::Data => JsonDecodeError::deserialize(error, Some(input_bytes)),
173            Category::Io | Category::Syntax | Category::Eof => {
174                JsonDecodeError::invalid_json(error, Some(input_bytes))
175            }
176        }
177    }
178}