qubit_json/
lenient_json_normalizer.rs1use std::borrow::Cow;
14
15use crate::{JsonDecodeError, JsonDecodeOptions};
16
17#[derive(Debug, Clone, Copy)]
22pub struct LenientJsonNormalizer {
23 options: JsonDecodeOptions,
25}
26
27impl Default for LenientJsonNormalizer {
28 fn default() -> Self {
29 Self::new(JsonDecodeOptions::default())
30 }
31}
32
33impl LenientJsonNormalizer {
34 #[must_use]
39 pub const fn new(options: JsonDecodeOptions) -> Self {
40 Self { options }
41 }
42
43 #[must_use]
45 pub const fn options(&self) -> &JsonDecodeOptions {
46 &self.options
47 }
48
49 pub fn normalize<'a>(&self, input: &'a str) -> Result<Cow<'a, str>, JsonDecodeError> {
55 let input = self.require_non_empty(input)?;
56 let input = self.trim_if_enabled(input);
57 let input = self.strip_utf8_bom(input);
58 let input = self.trim_if_enabled(input);
59 let input = self.strip_markdown_code_fence(input);
60 let input = self.trim_if_enabled(input);
61 let input = self.escape_control_chars_in_json_strings(input);
62 let input = self.trim_cow_if_enabled(input);
63
64 if input.is_empty() {
65 Err(JsonDecodeError::empty_input())
66 } else {
67 Ok(input)
68 }
69 }
70
71 fn require_non_empty<'a>(&self, input: &'a str) -> Result<&'a str, JsonDecodeError> {
76 if self.options.trim_whitespace {
77 if input.trim().is_empty() {
78 Err(JsonDecodeError::empty_input())
79 } else {
80 Ok(input)
81 }
82 } else if input.is_empty() {
83 Err(JsonDecodeError::empty_input())
84 } else {
85 Ok(input)
86 }
87 }
88
89 fn trim_if_enabled<'a>(&self, input: &'a str) -> &'a str {
93 if self.options.trim_whitespace {
94 input.trim()
95 } else {
96 input
97 }
98 }
99
100 fn trim_cow_if_enabled<'a>(&self, input: Cow<'a, str>) -> Cow<'a, str> {
105 if !self.options.trim_whitespace {
106 return input;
107 }
108 match input {
109 Cow::Borrowed(text) => Cow::Borrowed(text.trim()),
110 Cow::Owned(text) => {
111 let trimmed = text.trim();
112 if trimmed.len() == text.len() {
113 Cow::Owned(text)
114 } else {
115 Cow::Owned(trimmed.to_string())
116 }
117 }
118 }
119 }
120
121 fn strip_utf8_bom<'a>(&self, input: &'a str) -> &'a str {
125 if self.options.strip_utf8_bom {
126 input.strip_prefix('\u{feff}').unwrap_or(input)
127 } else {
128 input
129 }
130 }
131
132 fn strip_markdown_code_fence<'a>(&self, input: &'a str) -> &'a str {
138 if !self.options.strip_markdown_code_fence || !input.starts_with("```") {
139 return input;
140 }
141
142 let Some(line_end) = input.find('\n') else {
143 return input;
144 };
145 let content = &input[line_end + 1..];
146 let trimmed_end = content.trim_end_matches(char::is_whitespace);
147
148 if let Some(without_close) = trimmed_end.strip_suffix("```") {
149 without_close
150 } else {
151 content
152 }
153 }
154
155 fn escape_control_chars_in_json_strings<'a>(&self, input: &'a str) -> Cow<'a, str> {
160 if !self.options.escape_control_chars_in_strings {
161 return Cow::Borrowed(input);
162 }
163
164 let mut in_string = false;
165 let mut in_escape = false;
166 let mut output: Option<String> = None;
167
168 for (index, ch) in input.char_indices() {
169 let mut replacement = None;
170
171 if in_string {
172 if in_escape {
173 in_escape = false;
174 } else if ch == '\\' {
175 in_escape = true;
176 } else if ch == '"' {
177 in_string = false;
178 } else if ('\u{0000}'..='\u{001f}').contains(&ch) {
179 replacement = Some(self.escaped_control_char(ch));
180 }
181 } else if ch == '"' {
182 in_string = true;
183 }
184
185 if let Some(escaped) = replacement {
186 let text = output.get_or_insert_with(|| {
187 let mut text = String::with_capacity(input.len() + 8);
188 text.push_str(&input[..index]);
189 text
190 });
191 text.push_str(escaped);
192 continue;
193 }
194
195 if let Some(text) = output.as_mut() {
196 text.push(ch);
197 }
198 }
199
200 match output {
201 Some(text) => Cow::Owned(text),
202 None => Cow::Borrowed(input),
203 }
204 }
205
206 fn escaped_control_char(&self, ch: char) -> &'static str {
210 match ch {
211 '\u{0008}' => "\\b",
212 '\u{0009}' => "\\t",
213 '\u{000a}' => "\\n",
214 '\u{000c}' => "\\f",
215 '\u{000d}' => "\\r",
216 '\u{0000}' => "\\u0000",
217 '\u{0001}' => "\\u0001",
218 '\u{0002}' => "\\u0002",
219 '\u{0003}' => "\\u0003",
220 '\u{0004}' => "\\u0004",
221 '\u{0005}' => "\\u0005",
222 '\u{0006}' => "\\u0006",
223 '\u{0007}' => "\\u0007",
224 '\u{000b}' => "\\u000b",
225 '\u{000e}' => "\\u000e",
226 '\u{000f}' => "\\u000f",
227 '\u{0010}' => "\\u0010",
228 '\u{0011}' => "\\u0011",
229 '\u{0012}' => "\\u0012",
230 '\u{0013}' => "\\u0013",
231 '\u{0014}' => "\\u0014",
232 '\u{0015}' => "\\u0015",
233 '\u{0016}' => "\\u0016",
234 '\u{0017}' => "\\u0017",
235 '\u{0018}' => "\\u0018",
236 '\u{0019}' => "\\u0019",
237 '\u{001a}' => "\\u001a",
238 '\u{001b}' => "\\u001b",
239 '\u{001c}' => "\\u001c",
240 '\u{001d}' => "\\u001d",
241 '\u{001e}' => "\\u001e",
242 '\u{001f}' => "\\u001f",
243 _ => unreachable!("escaped_control_char only supports ASCII control chars"),
244 }
245 }
246}