quillmark_core/normalize.rs
1//! # Input Normalization
2//!
3//! This module provides input normalization for markdown content before parsing.
4//! Normalization ensures that invisible control characters and other artifacts
5//! that can interfere with markdown parsing are handled consistently.
6//!
7//! ## Overview
8//!
9//! Input text may contain invisible Unicode characters (especially from copy-paste)
10//! that interfere with markdown parsing. This module provides functions to:
11//!
12//! - Strip Unicode bidirectional formatting characters that break delimiter recognition
13//! - Orchestrate guillemet preprocessing (`<<text>>` → `«text»`)
14//! - Apply all normalizations in the correct order
15//!
16//! ## Functions
17//!
18//! - [`strip_bidi_formatting`] - Remove Unicode bidi control characters
19//! - [`normalize_markdown`] - Apply all markdown-specific normalizations
20//! - [`normalize_fields`] - Normalize document fields (bidi + guillemets)
21//!
22//! ## Why Normalize?
23//!
24//! Unicode bidirectional formatting characters (LRO, RLO, LRE, RLE, etc.) are invisible
25//! control characters used for bidirectional text layout. When placed adjacent to markdown
26//! delimiters like `**`, they can prevent parsers from recognizing the delimiters:
27//!
28//! ```text
29//! **bold** or <U+202D>**(1234**
30//! ^^^^^^^^ invisible LRO here prevents second ** from being recognized as bold
31//! ```
32//!
33//! These characters commonly appear when copying text from:
34//! - Web pages with mixed LTR/RTL content
35//! - PDF documents
36//! - Word processors
37//! - Some clipboard managers
38//!
39//! ## Examples
40//!
41//! ```
42//! use quillmark_core::normalize::strip_bidi_formatting;
43//!
44//! // Input with invisible U+202D (LRO) before second **
45//! let input = "**asdf** or \u{202D}**(1234**";
46//! let cleaned = strip_bidi_formatting(input);
47//! assert_eq!(cleaned, "**asdf** or **(1234**");
48//! ```
49
50use crate::guillemet::{preprocess_markdown_guillemets, strip_chevrons};
51use crate::parse::BODY_FIELD;
52use crate::value::QuillValue;
53use std::collections::HashMap;
54
55/// Maximum nesting depth for JSON value normalization to prevent stack overflow
56const MAX_NESTING_DEPTH: usize = 100;
57
58/// Errors that can occur during normalization
59#[derive(Debug, thiserror::Error)]
60pub enum NormalizationError {
61 /// JSON nesting depth exceeded maximum allowed
62 #[error("JSON nesting too deep: {depth} levels (max: {max} levels)")]
63 NestingTooDeep {
64 /// Actual depth
65 depth: usize,
66 /// Maximum allowed depth
67 max: usize,
68 },
69}
70
71/// Check if a character is a Unicode bidirectional formatting character
72#[inline]
73fn is_bidi_char(c: char) -> bool {
74 matches!(
75 c,
76 '\u{200E}' // LEFT-TO-RIGHT MARK (LRM)
77 | '\u{200F}' // RIGHT-TO-LEFT MARK (RLM)
78 | '\u{202A}' // LEFT-TO-RIGHT EMBEDDING (LRE)
79 | '\u{202B}' // RIGHT-TO-LEFT EMBEDDING (RLE)
80 | '\u{202C}' // POP DIRECTIONAL FORMATTING (PDF)
81 | '\u{202D}' // LEFT-TO-RIGHT OVERRIDE (LRO)
82 | '\u{202E}' // RIGHT-TO-LEFT OVERRIDE (RLO)
83 | '\u{2066}' // LEFT-TO-RIGHT ISOLATE (LRI)
84 | '\u{2067}' // RIGHT-TO-LEFT ISOLATE (RLI)
85 | '\u{2068}' // FIRST STRONG ISOLATE (FSI)
86 | '\u{2069}' // POP DIRECTIONAL ISOLATE (PDI)
87 )
88}
89
90/// Strips Unicode bidirectional formatting characters that can interfere with markdown parsing.
91///
92/// These invisible control characters are used for bidirectional text layout but can
93/// break markdown delimiter recognition when placed adjacent to `**`, `*`, `_`, etc.
94///
95/// # Characters Stripped
96///
97/// - U+200E (LEFT-TO-RIGHT MARK, LRM)
98/// - U+200F (RIGHT-TO-LEFT MARK, RLM)
99/// - U+202A (LEFT-TO-RIGHT EMBEDDING, LRE)
100/// - U+202B (RIGHT-TO-LEFT EMBEDDING, RLE)
101/// - U+202C (POP DIRECTIONAL FORMATTING, PDF)
102/// - U+202D (LEFT-TO-RIGHT OVERRIDE, LRO)
103/// - U+202E (RIGHT-TO-LEFT OVERRIDE, RLO)
104/// - U+2066 (LEFT-TO-RIGHT ISOLATE, LRI)
105/// - U+2067 (RIGHT-TO-LEFT ISOLATE, RLI)
106/// - U+2068 (FIRST STRONG ISOLATE, FSI)
107/// - U+2069 (POP DIRECTIONAL ISOLATE, PDI)
108///
109/// # Examples
110///
111/// ```
112/// use quillmark_core::normalize::strip_bidi_formatting;
113///
114/// // Normal text is unchanged
115/// assert_eq!(strip_bidi_formatting("hello"), "hello");
116///
117/// // LRO character is stripped
118/// assert_eq!(strip_bidi_formatting("he\u{202D}llo"), "hello");
119///
120/// // All bidi characters are stripped
121/// let input = "\u{200E}\u{200F}\u{202A}\u{202B}\u{202C}\u{202D}\u{202E}";
122/// assert_eq!(strip_bidi_formatting(input), "");
123/// ```
124pub fn strip_bidi_formatting(s: &str) -> String {
125 // Early return optimization: avoid allocation if no bidi characters present
126 if !s.chars().any(is_bidi_char) {
127 return s.to_string();
128 }
129
130 s.chars().filter(|c| !is_bidi_char(*c)).collect()
131}
132
133/// Fixes HTML comment closing fences to prevent content loss.
134///
135/// According to CommonMark, HTML block type 2 (comments) ends with the line containing `-->`.
136/// This means any text on the same line after `-->` is included in the HTML block and would
137/// be discarded by markdown parsers that ignore HTML blocks.
138///
139/// This function inserts a newline after `-->` when followed by non-whitespace content,
140/// ensuring the trailing text is parsed as regular markdown.
141///
142/// # Examples
143///
144/// ```
145/// use quillmark_core::normalize::fix_html_comment_fences;
146///
147/// // Text on same line as --> is moved to next line
148/// assert_eq!(
149/// fix_html_comment_fences("<!-- comment -->Some text"),
150/// "<!-- comment -->\nSome text"
151/// );
152///
153/// // Already on separate line - no change
154/// assert_eq!(
155/// fix_html_comment_fences("<!-- comment -->\nSome text"),
156/// "<!-- comment -->\nSome text"
157/// );
158///
159/// // Only whitespace after --> - no change needed
160/// assert_eq!(
161/// fix_html_comment_fences("<!-- comment --> \nSome text"),
162/// "<!-- comment --> \nSome text"
163/// );
164///
165/// // Multi-line comments with trailing text
166/// assert_eq!(
167/// fix_html_comment_fences("<!--\nmultiline\n-->Trailing text"),
168/// "<!--\nmultiline\n-->\nTrailing text"
169/// );
170/// ```
171pub fn fix_html_comment_fences(s: &str) -> String {
172 // Early return if no HTML comment closing fence present
173 if !s.contains("-->") {
174 return s.to_string();
175 }
176
177 let mut result = String::with_capacity(s.len() + 16); // Extra capacity for potential newlines
178 let mut remaining = s;
179
180 while let Some(fence_pos) = remaining.find("-->") {
181 let after_fence = fence_pos + 3; // Position after "-->"
182
183 // Copy everything up to and including "-->"
184 result.push_str(&remaining[..after_fence]);
185
186 // Check what comes after the fence
187 let after_content = &remaining[after_fence..];
188
189 if after_content.is_empty() {
190 // End of string, nothing to do
191 remaining = "";
192 } else if after_content.starts_with('\n') || after_content.starts_with("\r\n") {
193 // Already has a newline, continue normally
194 remaining = after_content;
195 } else {
196 // Check if there's only whitespace until end of line or end of string
197 let next_newline = after_content.find('\n');
198 let until_newline = match next_newline {
199 Some(pos) => &after_content[..pos],
200 None => after_content,
201 };
202
203 if until_newline.trim().is_empty() {
204 // Only whitespace after -->, keep as-is
205 remaining = after_content;
206 } else {
207 // Non-whitespace content after -->, insert newline
208 result.push('\n');
209 remaining = after_content;
210 }
211 }
212 }
213
214 // Append any remaining content after the last fence (or all content if no fence)
215 result.push_str(remaining);
216
217 result
218}
219
220/// Normalizes markdown content by applying all preprocessing steps.
221///
222/// This function applies normalizations in the correct order:
223/// 1. Strip Unicode bidirectional formatting characters
224/// 2. Fix HTML comment closing fences (ensure text after `-->` is preserved)
225///
226/// Note: Guillemet preprocessing (`<<text>>` → `«text»`) is handled separately
227/// in [`normalize_fields`] because it needs to be applied after schema defaults
228/// and coercion.
229///
230/// # Examples
231///
232/// ```
233/// use quillmark_core::normalize::normalize_markdown;
234///
235/// // Bidi characters are stripped
236/// let input = "**bold** \u{202D}**more**";
237/// let normalized = normalize_markdown(input);
238/// assert_eq!(normalized, "**bold** **more**");
239///
240/// // HTML comment trailing text is preserved
241/// let with_comment = "<!-- comment -->Some text";
242/// let normalized = normalize_markdown(with_comment);
243/// assert_eq!(normalized, "<!-- comment -->\nSome text");
244/// ```
245pub fn normalize_markdown(markdown: &str) -> String {
246 let cleaned = strip_bidi_formatting(markdown);
247 fix_html_comment_fences(&cleaned)
248}
249
250/// Normalizes a string value by stripping bidi characters and optionally processing guillemets.
251///
252/// - For body content: applies `preprocess_markdown_guillemets` (converts `<<text>>` to `«text»`)
253/// and `fix_html_comment_fences` to preserve text after `-->`
254/// - For other fields: applies `strip_chevrons` (removes chevrons entirely)
255fn normalize_string(s: &str, is_body: bool) -> String {
256 // First strip bidi formatting characters
257 let cleaned = strip_bidi_formatting(s);
258
259 // Then apply content-specific normalization
260 if is_body {
261 // Fix HTML comment fences first, then convert guillemets
262 let fixed = fix_html_comment_fences(&cleaned);
263 preprocess_markdown_guillemets(&fixed)
264 } else {
265 strip_chevrons(&cleaned)
266 }
267}
268
269/// Recursively normalize a JSON value with depth tracking.
270///
271/// Returns an error if nesting exceeds MAX_NESTING_DEPTH to prevent stack overflow.
272fn normalize_json_value_inner(
273 value: serde_json::Value,
274 is_body: bool,
275 depth: usize,
276) -> Result<serde_json::Value, NormalizationError> {
277 if depth > MAX_NESTING_DEPTH {
278 return Err(NormalizationError::NestingTooDeep {
279 depth,
280 max: MAX_NESTING_DEPTH,
281 });
282 }
283
284 match value {
285 serde_json::Value::String(s) => {
286 Ok(serde_json::Value::String(normalize_string(&s, is_body)))
287 }
288 serde_json::Value::Array(arr) => {
289 let normalized: Result<Vec<_>, _> = arr
290 .into_iter()
291 .map(|v| normalize_json_value_inner(v, false, depth + 1))
292 .collect();
293 Ok(serde_json::Value::Array(normalized?))
294 }
295 serde_json::Value::Object(map) => {
296 let processed: Result<serde_json::Map<String, serde_json::Value>, _> = map
297 .into_iter()
298 .map(|(k, v)| {
299 let is_body = k == BODY_FIELD;
300 normalize_json_value_inner(v, is_body, depth + 1).map(|nv| (k, nv))
301 })
302 .collect();
303 Ok(serde_json::Value::Object(processed?))
304 }
305 // Pass through other types unchanged (numbers, booleans, null)
306 other => Ok(other),
307 }
308}
309
310/// Recursively normalize a JSON value.
311///
312/// This is a convenience wrapper that starts depth tracking at 0.
313/// Logs a warning and returns the original value if depth is exceeded.
314fn normalize_json_value(value: serde_json::Value, is_body: bool) -> serde_json::Value {
315 match normalize_json_value_inner(value.clone(), is_body, 0) {
316 Ok(normalized) => normalized,
317 Err(e) => {
318 // Log warning but don't fail - return original value
319 eprintln!("Warning: {}", e);
320 value
321 }
322 }
323}
324
325/// Normalizes document fields by applying all preprocessing steps.
326///
327/// This function orchestrates input normalization for document fields:
328/// 1. Strips Unicode bidirectional formatting characters from all string values
329/// 2. For the body field: converts `<<text>>` to `«text»` (guillemets)
330/// 3. For other fields: strips chevrons entirely (`<<text>>` → `text`)
331///
332/// # Processing Order
333///
334/// The normalization order is important:
335/// 1. **Bidi stripping** - Must happen first so markdown delimiters are recognized
336/// 2. **Guillemet preprocessing** - Converts user syntax to internal markers
337///
338/// # Examples
339///
340/// ```
341/// use quillmark_core::normalize::normalize_fields;
342/// use quillmark_core::QuillValue;
343/// use std::collections::HashMap;
344///
345/// let mut fields = HashMap::new();
346/// fields.insert("title".to_string(), QuillValue::from_json(serde_json::json!("<<hello>>")));
347/// fields.insert("body".to_string(), QuillValue::from_json(serde_json::json!("**bold** \u{202D}**more**")));
348///
349/// let result = normalize_fields(fields);
350///
351/// // Title has chevrons stripped
352/// assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
353///
354/// // Body has bidi chars stripped (guillemet would apply if there were any <<>>)
355/// assert_eq!(result.get("body").unwrap().as_str().unwrap(), "**bold** **more**");
356/// ```
357pub fn normalize_fields(fields: HashMap<String, QuillValue>) -> HashMap<String, QuillValue> {
358 fields
359 .into_iter()
360 .map(|(key, value)| {
361 let json = value.into_json();
362 let processed = normalize_json_value(json, key == BODY_FIELD);
363 (key, QuillValue::from_json(processed))
364 })
365 .collect()
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371
372 // Tests for strip_bidi_formatting
373
374 #[test]
375 fn test_strip_bidi_no_change() {
376 assert_eq!(strip_bidi_formatting("hello world"), "hello world");
377 assert_eq!(strip_bidi_formatting(""), "");
378 assert_eq!(strip_bidi_formatting("**bold** text"), "**bold** text");
379 }
380
381 #[test]
382 fn test_strip_bidi_lro() {
383 // U+202D (LEFT-TO-RIGHT OVERRIDE)
384 assert_eq!(strip_bidi_formatting("he\u{202D}llo"), "hello");
385 assert_eq!(
386 strip_bidi_formatting("**asdf** or \u{202D}**(1234**"),
387 "**asdf** or **(1234**"
388 );
389 }
390
391 #[test]
392 fn test_strip_bidi_rlo() {
393 // U+202E (RIGHT-TO-LEFT OVERRIDE)
394 assert_eq!(strip_bidi_formatting("he\u{202E}llo"), "hello");
395 }
396
397 #[test]
398 fn test_strip_bidi_marks() {
399 // U+200E (LRM) and U+200F (RLM)
400 assert_eq!(strip_bidi_formatting("a\u{200E}b\u{200F}c"), "abc");
401 }
402
403 #[test]
404 fn test_strip_bidi_embeddings() {
405 // U+202A (LRE), U+202B (RLE), U+202C (PDF)
406 assert_eq!(
407 strip_bidi_formatting("\u{202A}text\u{202B}more\u{202C}"),
408 "textmore"
409 );
410 }
411
412 #[test]
413 fn test_strip_bidi_isolates() {
414 // U+2066 (LRI), U+2067 (RLI), U+2068 (FSI), U+2069 (PDI)
415 assert_eq!(
416 strip_bidi_formatting("\u{2066}a\u{2067}b\u{2068}c\u{2069}"),
417 "abc"
418 );
419 }
420
421 #[test]
422 fn test_strip_bidi_all_chars() {
423 let all_bidi = "\u{200E}\u{200F}\u{202A}\u{202B}\u{202C}\u{202D}\u{202E}\u{2066}\u{2067}\u{2068}\u{2069}";
424 assert_eq!(strip_bidi_formatting(all_bidi), "");
425 }
426
427 #[test]
428 fn test_strip_bidi_unicode_preserved() {
429 // Non-bidi unicode should be preserved
430 assert_eq!(strip_bidi_formatting("你好世界"), "你好世界");
431 assert_eq!(strip_bidi_formatting("مرحبا"), "مرحبا");
432 assert_eq!(strip_bidi_formatting("🎉"), "🎉");
433 }
434
435 // Tests for normalize_markdown
436
437 #[test]
438 fn test_normalize_markdown_basic() {
439 assert_eq!(normalize_markdown("hello"), "hello");
440 assert_eq!(
441 normalize_markdown("**bold** \u{202D}**more**"),
442 "**bold** **more**"
443 );
444 }
445
446 #[test]
447 fn test_normalize_markdown_html_comment() {
448 assert_eq!(
449 normalize_markdown("<!-- comment -->Some text"),
450 "<!-- comment -->\nSome text"
451 );
452 }
453
454 // Tests for fix_html_comment_fences
455
456 #[test]
457 fn test_fix_html_comment_no_comment() {
458 assert_eq!(fix_html_comment_fences("hello world"), "hello world");
459 assert_eq!(fix_html_comment_fences("**bold** text"), "**bold** text");
460 assert_eq!(fix_html_comment_fences(""), "");
461 }
462
463 #[test]
464 fn test_fix_html_comment_single_line_trailing_text() {
465 // Text on same line as --> should be moved to next line
466 assert_eq!(
467 fix_html_comment_fences("<!-- comment -->Same line text"),
468 "<!-- comment -->\nSame line text"
469 );
470 }
471
472 #[test]
473 fn test_fix_html_comment_already_newline() {
474 // Already has newline after --> - no change
475 assert_eq!(
476 fix_html_comment_fences("<!-- comment -->\nNext line text"),
477 "<!-- comment -->\nNext line text"
478 );
479 }
480
481 #[test]
482 fn test_fix_html_comment_only_whitespace_after() {
483 // Only whitespace after --> until newline - no change needed
484 assert_eq!(
485 fix_html_comment_fences("<!-- comment --> \nSome text"),
486 "<!-- comment --> \nSome text"
487 );
488 }
489
490 #[test]
491 fn test_fix_html_comment_multiline_trailing_text() {
492 // Multi-line comment with text on closing line
493 assert_eq!(
494 fix_html_comment_fences("<!--\nmultiline\ncomment\n-->Trailing text"),
495 "<!--\nmultiline\ncomment\n-->\nTrailing text"
496 );
497 }
498
499 #[test]
500 fn test_fix_html_comment_multiline_proper() {
501 // Multi-line comment with proper newline after -->
502 assert_eq!(
503 fix_html_comment_fences("<!--\nmultiline\n-->\n\nParagraph text"),
504 "<!--\nmultiline\n-->\n\nParagraph text"
505 );
506 }
507
508 #[test]
509 fn test_fix_html_comment_multiple_comments() {
510 // Multiple comments in the same document
511 assert_eq!(
512 fix_html_comment_fences("<!-- first -->Text\n\n<!-- second -->More text"),
513 "<!-- first -->\nText\n\n<!-- second -->\nMore text"
514 );
515 }
516
517 #[test]
518 fn test_fix_html_comment_end_of_string() {
519 // Comment at end of string - no trailing content
520 assert_eq!(
521 fix_html_comment_fences("Some text before <!-- comment -->"),
522 "Some text before <!-- comment -->"
523 );
524 }
525
526 #[test]
527 fn test_fix_html_comment_only_comment() {
528 // Just a comment with nothing after
529 assert_eq!(
530 fix_html_comment_fences("<!-- comment -->"),
531 "<!-- comment -->"
532 );
533 }
534
535 #[test]
536 fn test_fix_html_comment_arrow_not_comment() {
537 // --> that's not part of a comment (standalone)
538 // Still gets a newline if followed by non-whitespace (conservative approach)
539 assert_eq!(fix_html_comment_fences("-->some text"), "-->\nsome text");
540 }
541
542 #[test]
543 fn test_fix_html_comment_crlf() {
544 // CRLF line endings
545 assert_eq!(
546 fix_html_comment_fences("<!-- comment -->\r\nSome text"),
547 "<!-- comment -->\r\nSome text"
548 );
549 }
550
551 // Tests for normalize_fields
552
553 #[test]
554 fn test_normalize_fields_body_bidi() {
555 let mut fields = HashMap::new();
556 fields.insert(
557 "body".to_string(),
558 QuillValue::from_json(serde_json::json!("**bold** \u{202D}**more**")),
559 );
560
561 let result = normalize_fields(fields);
562 assert_eq!(
563 result.get("body").unwrap().as_str().unwrap(),
564 "**bold** **more**"
565 );
566 }
567
568 #[test]
569 fn test_normalize_fields_body_guillemets() {
570 let mut fields = HashMap::new();
571 fields.insert(
572 "body".to_string(),
573 QuillValue::from_json(serde_json::json!("<<raw>>")),
574 );
575
576 let result = normalize_fields(fields);
577 assert_eq!(result.get("body").unwrap().as_str().unwrap(), "«raw»");
578 }
579
580 #[test]
581 fn test_normalize_fields_body_both() {
582 let mut fields = HashMap::new();
583 fields.insert(
584 "body".to_string(),
585 QuillValue::from_json(serde_json::json!("<<raw>> \u{202D}**bold**")),
586 );
587
588 let result = normalize_fields(fields);
589 // Bidi stripped first, then guillemets converted
590 assert_eq!(
591 result.get("body").unwrap().as_str().unwrap(),
592 "«raw» **bold**"
593 );
594 }
595
596 #[test]
597 fn test_normalize_fields_other_field_chevrons_stripped() {
598 let mut fields = HashMap::new();
599 fields.insert(
600 "title".to_string(),
601 QuillValue::from_json(serde_json::json!("<<hello>>")),
602 );
603
604 let result = normalize_fields(fields);
605 assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
606 }
607
608 #[test]
609 fn test_normalize_fields_other_field_bidi_stripped() {
610 let mut fields = HashMap::new();
611 fields.insert(
612 "title".to_string(),
613 QuillValue::from_json(serde_json::json!("he\u{202D}llo")),
614 );
615
616 let result = normalize_fields(fields);
617 assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
618 }
619
620 #[test]
621 fn test_normalize_fields_nested_values() {
622 let mut fields = HashMap::new();
623 fields.insert(
624 "items".to_string(),
625 QuillValue::from_json(serde_json::json!(["<<a>>", "\u{202D}b"])),
626 );
627
628 let result = normalize_fields(fields);
629 let items = result.get("items").unwrap().as_array().unwrap();
630 assert_eq!(items[0].as_str().unwrap(), "a");
631 assert_eq!(items[1].as_str().unwrap(), "b");
632 }
633
634 #[test]
635 fn test_normalize_fields_object_values() {
636 let mut fields = HashMap::new();
637 fields.insert(
638 "meta".to_string(),
639 QuillValue::from_json(serde_json::json!({
640 "title": "<<hello>>",
641 "body": "<<content>>"
642 })),
643 );
644
645 let result = normalize_fields(fields);
646 let meta = result.get("meta").unwrap();
647 let meta_obj = meta.as_object().unwrap();
648 // Nested "body" key should be recognized
649 assert_eq!(meta_obj.get("title").unwrap().as_str().unwrap(), "hello");
650 assert_eq!(meta_obj.get("body").unwrap().as_str().unwrap(), "«content»");
651 }
652
653 #[test]
654 fn test_normalize_fields_non_string_unchanged() {
655 let mut fields = HashMap::new();
656 fields.insert(
657 "count".to_string(),
658 QuillValue::from_json(serde_json::json!(42)),
659 );
660 fields.insert(
661 "enabled".to_string(),
662 QuillValue::from_json(serde_json::json!(true)),
663 );
664
665 let result = normalize_fields(fields);
666 assert_eq!(result.get("count").unwrap().as_i64().unwrap(), 42);
667 assert!(result.get("enabled").unwrap().as_bool().unwrap());
668 }
669
670 // Tests for depth limiting
671
672 #[test]
673 fn test_normalize_json_value_inner_depth_exceeded() {
674 // Create a deeply nested JSON structure that exceeds MAX_NESTING_DEPTH
675 let mut value = serde_json::json!("leaf");
676 for _ in 0..=super::MAX_NESTING_DEPTH {
677 value = serde_json::json!([value]);
678 }
679
680 // The inner function should return an error
681 let result = super::normalize_json_value_inner(value, false, 0);
682 assert!(result.is_err());
683
684 if let Err(NormalizationError::NestingTooDeep { depth, max }) = result {
685 assert!(depth > max);
686 assert_eq!(max, super::MAX_NESTING_DEPTH);
687 } else {
688 panic!("Expected NestingTooDeep error");
689 }
690 }
691
692 #[test]
693 fn test_normalize_json_value_inner_within_limit() {
694 // Create a nested structure just within the limit
695 let mut value = serde_json::json!("leaf");
696 for _ in 0..50 {
697 value = serde_json::json!([value]);
698 }
699
700 // This should succeed
701 let result = super::normalize_json_value_inner(value, false, 0);
702 assert!(result.is_ok());
703 }
704}