qubit_json/
lenient_json_normalizer.rs1use std::borrow::Cow;
14
15use crate::{JsonDecodeError, JsonDecodeOptions};
16
17#[derive(Debug, Clone, Copy)]
22pub struct LenientJsonNormalizer {
23 options: JsonDecodeOptions,
25}
26
27impl Default for LenientJsonNormalizer {
28 fn default() -> Self {
29 Self::new(JsonDecodeOptions::default())
30 }
31}
32
33impl LenientJsonNormalizer {
34 #[must_use]
39 pub const fn new(options: JsonDecodeOptions) -> Self {
40 Self { options }
41 }
42
43 #[must_use]
45 pub const fn options(&self) -> &JsonDecodeOptions {
46 &self.options
47 }
48
49 pub fn normalize<'a>(&self, input: &'a str) -> Result<Cow<'a, str>, JsonDecodeError> {
55 self.require_within_size_limit(input)?;
56 let input = self.require_non_empty(input)?;
57 let input = self.trim_if_enabled(input);
58 let input = self.strip_utf8_bom(input);
59 let input = self.trim_if_enabled(input);
60 let input = self.strip_markdown_code_fence(input);
61 let input = self.trim_if_enabled(input);
62 let input = self.escape_control_chars_in_json_strings(input);
63 let input = self.trim_cow_if_enabled(input);
64
65 if input.is_empty() {
66 Err(JsonDecodeError::empty_input())
67 } else {
68 Ok(input)
69 }
70 }
71
72 fn require_non_empty<'a>(&self, input: &'a str) -> Result<&'a str, JsonDecodeError> {
77 if self.options.trim_whitespace {
78 if input.trim().is_empty() {
79 Err(JsonDecodeError::empty_input())
80 } else {
81 Ok(input)
82 }
83 } else if input.is_empty() {
84 Err(JsonDecodeError::empty_input())
85 } else {
86 Ok(input)
87 }
88 }
89
90 fn require_within_size_limit(&self, input: &str) -> Result<(), JsonDecodeError> {
93 if let Some(limit) = self.options.max_input_bytes {
94 let size = input.len();
95 if size > limit {
96 return Err(JsonDecodeError::input_too_large(size, limit));
97 }
98 }
99 Ok(())
100 }
101
102 fn trim_if_enabled<'a>(&self, input: &'a str) -> &'a str {
106 if self.options.trim_whitespace {
107 input.trim()
108 } else {
109 input
110 }
111 }
112
113 fn trim_cow_if_enabled<'a>(&self, input: Cow<'a, str>) -> Cow<'a, str> {
118 if !self.options.trim_whitespace {
119 return input;
120 }
121 match input {
122 Cow::Borrowed(text) => Cow::Borrowed(text.trim()),
123 Cow::Owned(text) => {
124 let trimmed = text.trim();
125 if trimmed.len() == text.len() {
126 Cow::Owned(text)
127 } else {
128 Cow::Owned(trimmed.to_string())
129 }
130 }
131 }
132 }
133
134 fn strip_utf8_bom<'a>(&self, input: &'a str) -> &'a str {
138 if self.options.strip_utf8_bom {
139 input.strip_prefix('\u{feff}').unwrap_or(input)
140 } else {
141 input
142 }
143 }
144
145 fn strip_markdown_code_fence<'a>(&self, input: &'a str) -> &'a str {
151 if !self.options.strip_markdown_code_fence || !input.starts_with("```") {
152 return input;
153 }
154
155 let Some(line_end) = input.find('\n') else {
156 return input;
157 };
158 let opening_tag = input[3..line_end].trim();
159 if self.options.strip_markdown_code_fence_json_only
160 && !Self::is_json_code_fence_tag(opening_tag)
161 {
162 return input;
163 }
164
165 let content = &input[line_end + 1..];
166
167 if let Some(without_close) = Self::strip_markdown_closing_fence(content) {
168 return without_close;
169 }
170 if self.options.strip_markdown_code_fence_requires_closing {
171 input
172 } else {
173 content
174 }
175 }
176
177 fn is_json_code_fence_tag(tag: &str) -> bool {
179 tag.is_empty() || tag.eq_ignore_ascii_case("json") || tag.eq_ignore_ascii_case("jsonc")
180 }
181
182 fn strip_markdown_closing_fence(content: &str) -> Option<&str> {
187 let trimmed_end = content.trim_end_matches(char::is_whitespace);
188 let without_close = trimmed_end.strip_suffix("```")?;
189 if without_close.is_empty()
190 || without_close.ends_with('\n')
191 || without_close.ends_with('\r')
192 {
193 Some(without_close)
194 } else {
195 None
196 }
197 }
198
199 fn escape_control_chars_in_json_strings<'a>(&self, input: &'a str) -> Cow<'a, str> {
204 if !self.options.escape_control_chars_in_strings {
205 return Cow::Borrowed(input);
206 }
207
208 let mut in_string = false;
209 let mut in_escape = false;
210 let mut output: Option<String> = None;
211
212 for (index, ch) in input.char_indices() {
213 let mut replacement = None;
214
215 if in_string {
216 if in_escape {
217 in_escape = false;
218 } else if ch == '\\' {
219 in_escape = true;
220 } else if ch == '"' {
221 in_string = false;
222 } else if ('\u{0000}'..='\u{001f}').contains(&ch) {
223 replacement = Some(self.escaped_control_char(ch));
224 }
225 } else if ch == '"' {
226 in_string = true;
227 }
228
229 if let Some(escaped) = replacement {
230 let text = output.get_or_insert_with(|| {
231 let mut text = String::with_capacity(input.len() + 8);
232 text.push_str(&input[..index]);
233 text
234 });
235 text.push_str(escaped);
236 continue;
237 }
238
239 if let Some(text) = output.as_mut() {
240 text.push(ch);
241 }
242 }
243
244 match output {
245 Some(text) => Cow::Owned(text),
246 None => Cow::Borrowed(input),
247 }
248 }
249
250 fn escaped_control_char(&self, ch: char) -> &'static str {
254 match ch {
255 '\u{0008}' => "\\b",
256 '\u{0009}' => "\\t",
257 '\u{000a}' => "\\n",
258 '\u{000c}' => "\\f",
259 '\u{000d}' => "\\r",
260 '\u{0000}' => "\\u0000",
261 '\u{0001}' => "\\u0001",
262 '\u{0002}' => "\\u0002",
263 '\u{0003}' => "\\u0003",
264 '\u{0004}' => "\\u0004",
265 '\u{0005}' => "\\u0005",
266 '\u{0006}' => "\\u0006",
267 '\u{0007}' => "\\u0007",
268 '\u{000b}' => "\\u000b",
269 '\u{000e}' => "\\u000e",
270 '\u{000f}' => "\\u000f",
271 '\u{0010}' => "\\u0010",
272 '\u{0011}' => "\\u0011",
273 '\u{0012}' => "\\u0012",
274 '\u{0013}' => "\\u0013",
275 '\u{0014}' => "\\u0014",
276 '\u{0015}' => "\\u0015",
277 '\u{0016}' => "\\u0016",
278 '\u{0017}' => "\\u0017",
279 '\u{0018}' => "\\u0018",
280 '\u{0019}' => "\\u0019",
281 '\u{001a}' => "\\u001a",
282 '\u{001b}' => "\\u001b",
283 '\u{001c}' => "\\u001c",
284 '\u{001d}' => "\\u001d",
285 '\u{001e}' => "\\u001e",
286 '\u{001f}' => "\\u001f",
287 _ => unreachable!("escaped_control_char only supports ASCII control chars"),
288 }
289 }
290}