lnmp_sanitize/
sanitize.rs1use std::borrow::Cow;
2
3use crate::mode::SanitizationLevel;
4
5#[derive(Debug, Clone)]
7pub struct SanitizationConfig {
8 pub level: SanitizationLevel,
10 pub auto_quote_strings: bool,
12 pub auto_escape_quotes: bool,
14 pub normalize_booleans: bool,
16 pub normalize_numbers: bool,
18}
19
20impl Default for SanitizationConfig {
21 fn default() -> Self {
22 Self {
23 level: SanitizationLevel::Normal,
24 auto_quote_strings: true,
25 auto_escape_quotes: true,
26 normalize_booleans: true,
27 normalize_numbers: false,
28 }
29 }
30}
31
32pub fn sanitize_lnmp_text<'a>(input: &'a str, config: &SanitizationConfig) -> Cow<'a, str> {
35 let mut changed = false;
36
37 let pass1 = structural_cleanup(input, config, &mut changed);
39
40 let pass2 = if config.level == SanitizationLevel::Minimal {
42 pass1
43 } else {
44 let quote_fixed = quote_and_escape_repair(&pass1, config, &mut changed);
45 if config.auto_quote_strings {
46 Cow::Owned(auto_quote_unquoted_values(
47 quote_fixed.as_ref(),
48 &mut changed,
49 ))
50 } else {
51 quote_fixed
52 }
53 };
54
55 let pass3 = if config.level == SanitizationLevel::Aggressive
57 && (config.normalize_booleans || config.normalize_numbers)
58 {
59 Cow::Owned(normalize_tokens(&pass2, config, &mut changed))
60 } else {
61 pass2
62 };
63
64 if changed {
65 Cow::Owned(pass3.into_owned())
66 } else {
67 Cow::Borrowed(input)
68 }
69}
70
71fn structural_cleanup<'a>(
72 input: &'a str,
73 config: &SanitizationConfig,
74 changed: &mut bool,
75) -> Cow<'a, str> {
76 if config.level == SanitizationLevel::Minimal {
78 let mut output = String::with_capacity(input.len());
79 for line in input.lines() {
80 let trimmed = line.trim_end_matches([' ', '\t']);
81 if trimmed.len() != line.len() {
82 *changed = true;
83 }
84 output.push_str(trimmed);
85 output.push('\n');
86 }
87 if !input.ends_with('\n') && !input.is_empty() {
88 output.pop();
89 }
90
91 if *changed {
92 return Cow::Owned(output);
93 }
94 return Cow::Borrowed(input);
95 }
96
97 let mut output = String::with_capacity(input.len());
98 let mut in_quotes = false;
99 let mut escape_next = false;
100 let mut last_emitted: Option<char> = None;
101
102 let mut chars = input.chars().peekable();
103 while let Some(ch) = chars.next() {
104 if escape_next {
105 output.push(ch);
106 last_emitted = Some(ch);
107 escape_next = false;
108 continue;
109 }
110
111 match ch {
112 '\\' => {
113 output.push('\\');
114 match chars.peek() {
115 Some('"' | '\\' | 'n' | 'r' | 't') => {
116 escape_next = true;
117 }
118 Some(_) if in_quotes && config.auto_escape_quotes => {
119 escape_next = true;
120 *changed = true;
121 }
122 None => {
123 output.push('\\');
124 *changed = true;
125 }
126 _ => {}
127 }
128 last_emitted = Some('\\');
129 }
130 '"' => {
131 in_quotes = !in_quotes;
132 output.push('"');
133 last_emitted = Some('"');
134 }
135 ';' if !in_quotes => {
136 output.push(';');
137 last_emitted = Some(';');
138 while matches!(chars.peek(), Some(c) if c.is_whitespace()) {
139 chars.next();
140 *changed = true;
141 }
142 }
143 ',' if !in_quotes => {
144 output.push(',');
145 last_emitted = Some(',');
146 while matches!(chars.peek(), Some(c) if c.is_whitespace()) {
147 chars.next();
148 *changed = true;
149 }
150 }
151 '\n' => {
152 while output.ends_with(' ') || output.ends_with('\t') {
153 output.pop();
154 *changed = true;
155 }
156 output.push('\n');
157 last_emitted = Some('\n');
158 }
159 '\r' => {
160 *changed = true;
161 output.push('\n');
162 last_emitted = Some('\n');
163 }
164 ' ' | '\t' if !in_quotes => {
165 let next_non_space = {
166 let mut clone = chars.clone();
167 clone.find(|c| *c != ' ' && *c != '\t')
168 };
169
170 let prev_is_boundary = matches!(
171 last_emitted,
172 None | Some('\n' | ';' | ',' | '=' | '[' | '{')
173 );
174 let next_is_boundary = matches!(
175 next_non_space,
176 None | Some('\n' | ';' | ',' | '=' | ']' | '}')
177 );
178
179 if prev_is_boundary || next_is_boundary {
180 *changed = true;
181 continue;
182 }
183
184 if last_emitted == Some(' ') {
185 *changed = true;
186 continue;
187 }
188
189 output.push(' ');
190 last_emitted = Some(' ');
191 }
192 other => {
193 output.push(other);
194 last_emitted = Some(other);
195 }
196 }
197 }
198
199 if in_quotes && config.auto_escape_quotes {
200 output.push('"');
201 *changed = true;
202 }
203
204 if *changed {
205 Cow::Owned(output)
206 } else {
207 Cow::Borrowed(input)
208 }
209}
210
211fn quote_and_escape_repair<'a>(
212 input: &'a str,
213 config: &SanitizationConfig,
214 changed: &mut bool,
215) -> Cow<'a, str> {
216 let mut output = String::with_capacity(input.len());
217 let mut in_quotes = false;
218 let mut escape_next = false;
219
220 for ch in input.chars() {
221 if escape_next {
222 output.push(ch);
223 escape_next = false;
224 continue;
225 }
226
227 match ch {
228 '\\' => {
229 output.push('\\');
230 escape_next = true;
231 }
232 '"' => {
233 in_quotes = !in_quotes;
234 output.push('"');
235 }
236 _ => {
237 output.push(ch);
238 }
239 }
240 }
241
242 if in_quotes && config.auto_escape_quotes {
243 output.push('"');
244 *changed = true;
245 }
246
247 if *changed {
248 Cow::Owned(output)
249 } else {
250 Cow::Borrowed(input)
251 }
252}
253
254fn auto_quote_unquoted_values(input: &str, changed: &mut bool) -> String {
255 let mut output = String::with_capacity(input.len());
256 let mut iter = input.char_indices().peekable();
257 while let Some((idx, ch)) = iter.next() {
258 if ch == '=' {
259 output.push('=');
260
261 let value_start = idx + ch.len_utf8();
262 let mut value_end = value_start;
263 let mut in_quotes = false;
264 let mut escape_next = false;
265
266 while let Some(&(next_idx, next_ch)) = iter.peek() {
267 if escape_next {
268 escape_next = false;
269 iter.next();
270 value_end = next_idx + next_ch.len_utf8();
271 continue;
272 }
273 match next_ch {
274 '\\' => {
275 escape_next = true;
276 iter.next();
277 value_end = next_idx + next_ch.len_utf8();
278 }
279 '"' => {
280 in_quotes = !in_quotes;
281 iter.next();
282 value_end = next_idx + next_ch.len_utf8();
283 }
284 ';' | '\n' if !in_quotes => break,
285 _ => {
286 iter.next();
287 value_end = next_idx + next_ch.len_utf8();
288 }
289 }
290 }
291
292 let value = &input[value_start..value_end];
293 let trimmed = value.trim();
294 let starts_structural = trimmed.starts_with('[') || trimmed.starts_with('{');
295 let needs_quotes = !trimmed.is_empty()
296 && !trimmed.starts_with('"')
297 && !starts_structural
298 && (trimmed.contains('"') || trimmed.chars().any(char::is_whitespace));
299
300 if needs_quotes {
301 let mut escaped = String::with_capacity(value.len() + 4);
302 for ch in value.chars() {
303 match ch {
304 '"' => {
305 escaped.push_str("\\\"");
306 }
307 '\\' => {
308 escaped.push_str("\\\\");
309 }
310 _ => escaped.push(ch),
311 }
312 }
313 output.push('"');
314 output.push_str(escaped.trim());
315 output.push('"');
316 *changed = true;
317 } else {
318 output.push_str(value);
319 }
320
321 if let Some(&(_, delim)) = iter.peek() {
322 if delim == ';' || delim == '\n' {
323 output.push(delim);
324 iter.next();
325 }
326 }
327 } else {
328 output.push(ch);
329 }
330 }
331
332 output
333}
334
335fn normalize_tokens(input: &str, config: &SanitizationConfig, changed: &mut bool) -> String {
336 let mut out = String::with_capacity(input.len());
337 let mut token = String::new();
338 let mut in_quotes = false;
339 let mut escape_next = false;
340
341 for ch in input.chars() {
342 if escape_next {
343 out.push(ch);
344 escape_next = false;
345 continue;
346 }
347
348 if ch == '\\' && in_quotes {
349 out.push('\\');
350 escape_next = true;
351 continue;
352 }
353
354 if ch == '"' {
355 flush_token(&mut token, &mut out, config, changed);
356 in_quotes = !in_quotes;
357 out.push('"');
358 continue;
359 }
360
361 if in_quotes {
362 out.push(ch);
363 continue;
364 }
365
366 if ch.is_ascii_alphanumeric() || ch == '-' {
367 token.push(ch);
368 } else {
369 flush_token(&mut token, &mut out, config, changed);
370 out.push(ch);
371 }
372 }
373
374 flush_token(&mut token, &mut out, config, changed);
375 out
376}
377
378fn flush_token(
379 token: &mut String,
380 out: &mut String,
381 config: &SanitizationConfig,
382 changed: &mut bool,
383) {
384 if token.is_empty() {
385 return;
386 }
387
388 let mut replacement: Option<String> = None;
389
390 if config.normalize_booleans {
391 match token.to_ascii_lowercase().as_str() {
392 "true" | "yes" => replacement = Some("1".to_string()),
393 "false" | "no" => replacement = Some("0".to_string()),
394 _ => {}
395 }
396 }
397
398 if replacement.is_none()
399 && config.normalize_numbers
400 && token.len() > 1
401 && token.chars().all(|c| c.is_ascii_digit())
402 && token.starts_with('0')
403 {
404 let trimmed = token.trim_start_matches('0');
405 let normalized = if trimmed.is_empty() { "0" } else { trimmed };
406 replacement = Some(normalized.to_string());
407 }
408
409 if let Some(ref value) = replacement {
410 *changed |= value != token;
411 out.push_str(value);
412 } else {
413 out.push_str(token);
414 }
415
416 token.clear();
417}