quillmark_core/
normalize.rs

1//! # Input Normalization
2//!
3//! This module provides input normalization for markdown content before parsing.
4//! Normalization ensures that invisible control characters and other artifacts
5//! that can interfere with markdown parsing are handled consistently.
6//!
7//! ## Overview
8//!
9//! Input text may contain invisible Unicode characters (especially from copy-paste)
10//! that interfere with markdown parsing. This module provides functions to:
11//!
12//! - Strip Unicode bidirectional formatting characters that break delimiter recognition
13//! - Orchestrate guillemet preprocessing (`<<text>>` → `«text»`)
14//! - Apply all normalizations in the correct order
15//!
16//! ## Functions
17//!
18//! - [`strip_bidi_formatting`] - Remove Unicode bidi control characters
19//! - [`normalize_markdown`] - Apply all markdown-specific normalizations
20//! - [`normalize_fields`] - Normalize document fields (bidi + guillemets)
21//!
22//! ## Why Normalize?
23//!
24//! Unicode bidirectional formatting characters (LRO, RLO, LRE, RLE, etc.) are invisible
25//! control characters used for bidirectional text layout. When placed adjacent to markdown
26//! delimiters like `**`, they can prevent parsers from recognizing the delimiters:
27//!
28//! ```text
29//! **bold** or <U+202D>**(1234**
30//!             ^^^^^^^^ invisible LRO here prevents second ** from being recognized as bold
31//! ```
32//!
33//! These characters commonly appear when copying text from:
34//! - Web pages with mixed LTR/RTL content
35//! - PDF documents
36//! - Word processors
37//! - Some clipboard managers
38//!
39//! ## Examples
40//!
41//! ```
42//! use quillmark_core::normalize::strip_bidi_formatting;
43//!
44//! // Input with invisible U+202D (LRO) before second **
45//! let input = "**asdf** or \u{202D}**(1234**";
46//! let cleaned = strip_bidi_formatting(input);
47//! assert_eq!(cleaned, "**asdf** or **(1234**");
48//! ```
49
50use crate::guillemet::{preprocess_markdown_guillemets, strip_chevrons};
51use crate::parse::BODY_FIELD;
52use crate::value::QuillValue;
53use std::collections::HashMap;
54
55/// Maximum nesting depth for JSON value normalization to prevent stack overflow
56const MAX_NESTING_DEPTH: usize = 100;
57
58/// Errors that can occur during normalization
59#[derive(Debug, thiserror::Error)]
60pub enum NormalizationError {
61    /// JSON nesting depth exceeded maximum allowed
62    #[error("JSON nesting too deep: {depth} levels (max: {max} levels)")]
63    NestingTooDeep {
64        /// Actual depth
65        depth: usize,
66        /// Maximum allowed depth
67        max: usize,
68    },
69}
70
71/// Check if a character is a Unicode bidirectional formatting character
72#[inline]
73fn is_bidi_char(c: char) -> bool {
74    matches!(
75        c,
76        '\u{200E}' // LEFT-TO-RIGHT MARK (LRM)
77        | '\u{200F}' // RIGHT-TO-LEFT MARK (RLM)
78        | '\u{202A}' // LEFT-TO-RIGHT EMBEDDING (LRE)
79        | '\u{202B}' // RIGHT-TO-LEFT EMBEDDING (RLE)
80        | '\u{202C}' // POP DIRECTIONAL FORMATTING (PDF)
81        | '\u{202D}' // LEFT-TO-RIGHT OVERRIDE (LRO)
82        | '\u{202E}' // RIGHT-TO-LEFT OVERRIDE (RLO)
83        | '\u{2066}' // LEFT-TO-RIGHT ISOLATE (LRI)
84        | '\u{2067}' // RIGHT-TO-LEFT ISOLATE (RLI)
85        | '\u{2068}' // FIRST STRONG ISOLATE (FSI)
86        | '\u{2069}' // POP DIRECTIONAL ISOLATE (PDI)
87    )
88}
89
90/// Strips Unicode bidirectional formatting characters that can interfere with markdown parsing.
91///
92/// These invisible control characters are used for bidirectional text layout but can
93/// break markdown delimiter recognition when placed adjacent to `**`, `*`, `_`, etc.
94///
95/// # Characters Stripped
96///
97/// - U+200E (LEFT-TO-RIGHT MARK, LRM)
98/// - U+200F (RIGHT-TO-LEFT MARK, RLM)
99/// - U+202A (LEFT-TO-RIGHT EMBEDDING, LRE)
100/// - U+202B (RIGHT-TO-LEFT EMBEDDING, RLE)
101/// - U+202C (POP DIRECTIONAL FORMATTING, PDF)
102/// - U+202D (LEFT-TO-RIGHT OVERRIDE, LRO)
103/// - U+202E (RIGHT-TO-LEFT OVERRIDE, RLO)
104/// - U+2066 (LEFT-TO-RIGHT ISOLATE, LRI)
105/// - U+2067 (RIGHT-TO-LEFT ISOLATE, RLI)
106/// - U+2068 (FIRST STRONG ISOLATE, FSI)
107/// - U+2069 (POP DIRECTIONAL ISOLATE, PDI)
108///
109/// # Examples
110///
111/// ```
112/// use quillmark_core::normalize::strip_bidi_formatting;
113///
114/// // Normal text is unchanged
115/// assert_eq!(strip_bidi_formatting("hello"), "hello");
116///
117/// // LRO character is stripped
118/// assert_eq!(strip_bidi_formatting("he\u{202D}llo"), "hello");
119///
120/// // All bidi characters are stripped
121/// let input = "\u{200E}\u{200F}\u{202A}\u{202B}\u{202C}\u{202D}\u{202E}";
122/// assert_eq!(strip_bidi_formatting(input), "");
123/// ```
124pub fn strip_bidi_formatting(s: &str) -> String {
125    // Early return optimization: avoid allocation if no bidi characters present
126    if !s.chars().any(is_bidi_char) {
127        return s.to_string();
128    }
129
130    s.chars().filter(|c| !is_bidi_char(*c)).collect()
131}
132
133/// Fixes HTML comment closing fences to prevent content loss.
134///
135/// According to CommonMark, HTML block type 2 (comments) ends with the line containing `-->`.
136/// This means any text on the same line after `-->` is included in the HTML block and would
137/// be discarded by markdown parsers that ignore HTML blocks.
138///
139/// This function inserts a newline after `-->` when followed by non-whitespace content,
140/// ensuring the trailing text is parsed as regular markdown.
141///
142/// # Examples
143///
144/// ```
145/// use quillmark_core::normalize::fix_html_comment_fences;
146///
147/// // Text on same line as --> is moved to next line
148/// assert_eq!(
149///     fix_html_comment_fences("<!-- comment -->Some text"),
150///     "<!-- comment -->\nSome text"
151/// );
152///
153/// // Already on separate line - no change
154/// assert_eq!(
155///     fix_html_comment_fences("<!-- comment -->\nSome text"),
156///     "<!-- comment -->\nSome text"
157/// );
158///
159/// // Only whitespace after --> - no change needed
160/// assert_eq!(
161///     fix_html_comment_fences("<!-- comment -->   \nSome text"),
162///     "<!-- comment -->   \nSome text"
163/// );
164///
165/// // Multi-line comments with trailing text
166/// assert_eq!(
167///     fix_html_comment_fences("<!--\nmultiline\n-->Trailing text"),
168///     "<!--\nmultiline\n-->\nTrailing text"
169/// );
170/// ```
171pub fn fix_html_comment_fences(s: &str) -> String {
172    // Early return if no HTML comment closing fence present
173    if !s.contains("-->") {
174        return s.to_string();
175    }
176
177    let mut result = String::with_capacity(s.len() + 16); // Extra capacity for potential newlines
178    let mut remaining = s;
179
180    while let Some(fence_pos) = remaining.find("-->") {
181        let after_fence = fence_pos + 3; // Position after "-->"
182
183        // Copy everything up to and including "-->"
184        result.push_str(&remaining[..after_fence]);
185
186        // Check what comes after the fence
187        let after_content = &remaining[after_fence..];
188
189        if after_content.is_empty() {
190            // End of string, nothing to do
191            remaining = "";
192        } else if after_content.starts_with('\n') || after_content.starts_with("\r\n") {
193            // Already has a newline, continue normally
194            remaining = after_content;
195        } else {
196            // Check if there's only whitespace until end of line or end of string
197            let next_newline = after_content.find('\n');
198            let until_newline = match next_newline {
199                Some(pos) => &after_content[..pos],
200                None => after_content,
201            };
202
203            if until_newline.trim().is_empty() {
204                // Only whitespace after -->, keep as-is
205                remaining = after_content;
206            } else {
207                // Non-whitespace content after -->, insert newline
208                result.push('\n');
209                remaining = after_content;
210            }
211        }
212    }
213
214    // Append any remaining content after the last fence (or all content if no fence)
215    result.push_str(remaining);
216
217    result
218}
219
220/// Normalizes markdown content by applying all preprocessing steps.
221///
222/// This function applies normalizations in the correct order:
223/// 1. Strip Unicode bidirectional formatting characters
224/// 2. Fix HTML comment closing fences (ensure text after `-->` is preserved)
225///
226/// Note: Guillemet preprocessing (`<<text>>` → `«text»`) is handled separately
227/// in [`normalize_fields`] because it needs to be applied after schema defaults
228/// and coercion.
229///
230/// # Examples
231///
232/// ```
233/// use quillmark_core::normalize::normalize_markdown;
234///
235/// // Bidi characters are stripped
236/// let input = "**bold** \u{202D}**more**";
237/// let normalized = normalize_markdown(input);
238/// assert_eq!(normalized, "**bold** **more**");
239///
240/// // HTML comment trailing text is preserved
241/// let with_comment = "<!-- comment -->Some text";
242/// let normalized = normalize_markdown(with_comment);
243/// assert_eq!(normalized, "<!-- comment -->\nSome text");
244/// ```
245pub fn normalize_markdown(markdown: &str) -> String {
246    let cleaned = strip_bidi_formatting(markdown);
247    fix_html_comment_fences(&cleaned)
248}
249
250/// Normalizes a string value by stripping bidi characters and optionally processing guillemets.
251///
252/// - For body content: applies `preprocess_markdown_guillemets` (converts `<<text>>` to `«text»`)
253///   and `fix_html_comment_fences` to preserve text after `-->`
254/// - For other fields: applies `strip_chevrons` (removes chevrons entirely)
255fn normalize_string(s: &str, is_body: bool) -> String {
256    // First strip bidi formatting characters
257    let cleaned = strip_bidi_formatting(s);
258
259    // Then apply content-specific normalization
260    if is_body {
261        // Fix HTML comment fences first, then convert guillemets
262        let fixed = fix_html_comment_fences(&cleaned);
263        preprocess_markdown_guillemets(&fixed)
264    } else {
265        strip_chevrons(&cleaned)
266    }
267}
268
269/// Recursively normalize a JSON value with depth tracking.
270///
271/// Returns an error if nesting exceeds MAX_NESTING_DEPTH to prevent stack overflow.
272fn normalize_json_value_inner(
273    value: serde_json::Value,
274    is_body: bool,
275    depth: usize,
276) -> Result<serde_json::Value, NormalizationError> {
277    if depth > MAX_NESTING_DEPTH {
278        return Err(NormalizationError::NestingTooDeep {
279            depth,
280            max: MAX_NESTING_DEPTH,
281        });
282    }
283
284    match value {
285        serde_json::Value::String(s) => {
286            Ok(serde_json::Value::String(normalize_string(&s, is_body)))
287        }
288        serde_json::Value::Array(arr) => {
289            let normalized: Result<Vec<_>, _> = arr
290                .into_iter()
291                .map(|v| normalize_json_value_inner(v, false, depth + 1))
292                .collect();
293            Ok(serde_json::Value::Array(normalized?))
294        }
295        serde_json::Value::Object(map) => {
296            let processed: Result<serde_json::Map<String, serde_json::Value>, _> = map
297                .into_iter()
298                .map(|(k, v)| {
299                    let is_body = k == BODY_FIELD;
300                    normalize_json_value_inner(v, is_body, depth + 1).map(|nv| (k, nv))
301                })
302                .collect();
303            Ok(serde_json::Value::Object(processed?))
304        }
305        // Pass through other types unchanged (numbers, booleans, null)
306        other => Ok(other),
307    }
308}
309
310/// Recursively normalize a JSON value.
311///
312/// This is a convenience wrapper that starts depth tracking at 0.
313/// Logs a warning and returns the original value if depth is exceeded.
314fn normalize_json_value(value: serde_json::Value, is_body: bool) -> serde_json::Value {
315    match normalize_json_value_inner(value.clone(), is_body, 0) {
316        Ok(normalized) => normalized,
317        Err(e) => {
318            // Log warning but don't fail - return original value
319            eprintln!("Warning: {}", e);
320            value
321        }
322    }
323}
324
325/// Normalizes document fields by applying all preprocessing steps.
326///
327/// This function orchestrates input normalization for document fields:
328/// 1. Strips Unicode bidirectional formatting characters from all string values
329/// 2. For the body field: converts `<<text>>` to `«text»` (guillemets)
330/// 3. For other fields: strips chevrons entirely (`<<text>>` → `text`)
331///
332/// # Processing Order
333///
334/// The normalization order is important:
335/// 1. **Bidi stripping** - Must happen first so markdown delimiters are recognized
336/// 2. **Guillemet preprocessing** - Converts user syntax to internal markers
337///
338/// # Examples
339///
340/// ```
341/// use quillmark_core::normalize::normalize_fields;
342/// use quillmark_core::QuillValue;
343/// use std::collections::HashMap;
344///
345/// let mut fields = HashMap::new();
346/// fields.insert("title".to_string(), QuillValue::from_json(serde_json::json!("<<hello>>")));
347/// fields.insert("body".to_string(), QuillValue::from_json(serde_json::json!("**bold** \u{202D}**more**")));
348///
349/// let result = normalize_fields(fields);
350///
351/// // Title has chevrons stripped
352/// assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
353///
354/// // Body has bidi chars stripped (guillemet would apply if there were any <<>>)
355/// assert_eq!(result.get("body").unwrap().as_str().unwrap(), "**bold** **more**");
356/// ```
357pub fn normalize_fields(fields: HashMap<String, QuillValue>) -> HashMap<String, QuillValue> {
358    fields
359        .into_iter()
360        .map(|(key, value)| {
361            let json = value.into_json();
362            let processed = normalize_json_value(json, key == BODY_FIELD);
363            (key, QuillValue::from_json(processed))
364        })
365        .collect()
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    // Tests for strip_bidi_formatting
373
374    #[test]
375    fn test_strip_bidi_no_change() {
376        assert_eq!(strip_bidi_formatting("hello world"), "hello world");
377        assert_eq!(strip_bidi_formatting(""), "");
378        assert_eq!(strip_bidi_formatting("**bold** text"), "**bold** text");
379    }
380
381    #[test]
382    fn test_strip_bidi_lro() {
383        // U+202D (LEFT-TO-RIGHT OVERRIDE)
384        assert_eq!(strip_bidi_formatting("he\u{202D}llo"), "hello");
385        assert_eq!(
386            strip_bidi_formatting("**asdf** or \u{202D}**(1234**"),
387            "**asdf** or **(1234**"
388        );
389    }
390
391    #[test]
392    fn test_strip_bidi_rlo() {
393        // U+202E (RIGHT-TO-LEFT OVERRIDE)
394        assert_eq!(strip_bidi_formatting("he\u{202E}llo"), "hello");
395    }
396
397    #[test]
398    fn test_strip_bidi_marks() {
399        // U+200E (LRM) and U+200F (RLM)
400        assert_eq!(strip_bidi_formatting("a\u{200E}b\u{200F}c"), "abc");
401    }
402
403    #[test]
404    fn test_strip_bidi_embeddings() {
405        // U+202A (LRE), U+202B (RLE), U+202C (PDF)
406        assert_eq!(
407            strip_bidi_formatting("\u{202A}text\u{202B}more\u{202C}"),
408            "textmore"
409        );
410    }
411
412    #[test]
413    fn test_strip_bidi_isolates() {
414        // U+2066 (LRI), U+2067 (RLI), U+2068 (FSI), U+2069 (PDI)
415        assert_eq!(
416            strip_bidi_formatting("\u{2066}a\u{2067}b\u{2068}c\u{2069}"),
417            "abc"
418        );
419    }
420
421    #[test]
422    fn test_strip_bidi_all_chars() {
423        let all_bidi = "\u{200E}\u{200F}\u{202A}\u{202B}\u{202C}\u{202D}\u{202E}\u{2066}\u{2067}\u{2068}\u{2069}";
424        assert_eq!(strip_bidi_formatting(all_bidi), "");
425    }
426
427    #[test]
428    fn test_strip_bidi_unicode_preserved() {
429        // Non-bidi unicode should be preserved
430        assert_eq!(strip_bidi_formatting("你好世界"), "你好世界");
431        assert_eq!(strip_bidi_formatting("مرحبا"), "مرحبا");
432        assert_eq!(strip_bidi_formatting("🎉"), "🎉");
433    }
434
435    // Tests for normalize_markdown
436
437    #[test]
438    fn test_normalize_markdown_basic() {
439        assert_eq!(normalize_markdown("hello"), "hello");
440        assert_eq!(
441            normalize_markdown("**bold** \u{202D}**more**"),
442            "**bold** **more**"
443        );
444    }
445
446    #[test]
447    fn test_normalize_markdown_html_comment() {
448        assert_eq!(
449            normalize_markdown("<!-- comment -->Some text"),
450            "<!-- comment -->\nSome text"
451        );
452    }
453
454    // Tests for fix_html_comment_fences
455
456    #[test]
457    fn test_fix_html_comment_no_comment() {
458        assert_eq!(fix_html_comment_fences("hello world"), "hello world");
459        assert_eq!(fix_html_comment_fences("**bold** text"), "**bold** text");
460        assert_eq!(fix_html_comment_fences(""), "");
461    }
462
463    #[test]
464    fn test_fix_html_comment_single_line_trailing_text() {
465        // Text on same line as --> should be moved to next line
466        assert_eq!(
467            fix_html_comment_fences("<!-- comment -->Same line text"),
468            "<!-- comment -->\nSame line text"
469        );
470    }
471
472    #[test]
473    fn test_fix_html_comment_already_newline() {
474        // Already has newline after --> - no change
475        assert_eq!(
476            fix_html_comment_fences("<!-- comment -->\nNext line text"),
477            "<!-- comment -->\nNext line text"
478        );
479    }
480
481    #[test]
482    fn test_fix_html_comment_only_whitespace_after() {
483        // Only whitespace after --> until newline - no change needed
484        assert_eq!(
485            fix_html_comment_fences("<!-- comment -->   \nSome text"),
486            "<!-- comment -->   \nSome text"
487        );
488    }
489
490    #[test]
491    fn test_fix_html_comment_multiline_trailing_text() {
492        // Multi-line comment with text on closing line
493        assert_eq!(
494            fix_html_comment_fences("<!--\nmultiline\ncomment\n-->Trailing text"),
495            "<!--\nmultiline\ncomment\n-->\nTrailing text"
496        );
497    }
498
499    #[test]
500    fn test_fix_html_comment_multiline_proper() {
501        // Multi-line comment with proper newline after -->
502        assert_eq!(
503            fix_html_comment_fences("<!--\nmultiline\n-->\n\nParagraph text"),
504            "<!--\nmultiline\n-->\n\nParagraph text"
505        );
506    }
507
508    #[test]
509    fn test_fix_html_comment_multiple_comments() {
510        // Multiple comments in the same document
511        assert_eq!(
512            fix_html_comment_fences("<!-- first -->Text\n\n<!-- second -->More text"),
513            "<!-- first -->\nText\n\n<!-- second -->\nMore text"
514        );
515    }
516
517    #[test]
518    fn test_fix_html_comment_end_of_string() {
519        // Comment at end of string - no trailing content
520        assert_eq!(
521            fix_html_comment_fences("Some text before <!-- comment -->"),
522            "Some text before <!-- comment -->"
523        );
524    }
525
526    #[test]
527    fn test_fix_html_comment_only_comment() {
528        // Just a comment with nothing after
529        assert_eq!(
530            fix_html_comment_fences("<!-- comment -->"),
531            "<!-- comment -->"
532        );
533    }
534
535    #[test]
536    fn test_fix_html_comment_arrow_not_comment() {
537        // --> that's not part of a comment (standalone)
538        // Still gets a newline if followed by non-whitespace (conservative approach)
539        assert_eq!(fix_html_comment_fences("-->some text"), "-->\nsome text");
540    }
541
542    #[test]
543    fn test_fix_html_comment_crlf() {
544        // CRLF line endings
545        assert_eq!(
546            fix_html_comment_fences("<!-- comment -->\r\nSome text"),
547            "<!-- comment -->\r\nSome text"
548        );
549    }
550
551    // Tests for normalize_fields
552
553    #[test]
554    fn test_normalize_fields_body_bidi() {
555        let mut fields = HashMap::new();
556        fields.insert(
557            "body".to_string(),
558            QuillValue::from_json(serde_json::json!("**bold** \u{202D}**more**")),
559        );
560
561        let result = normalize_fields(fields);
562        assert_eq!(
563            result.get("body").unwrap().as_str().unwrap(),
564            "**bold** **more**"
565        );
566    }
567
568    #[test]
569    fn test_normalize_fields_body_guillemets() {
570        let mut fields = HashMap::new();
571        fields.insert(
572            "body".to_string(),
573            QuillValue::from_json(serde_json::json!("<<raw>>")),
574        );
575
576        let result = normalize_fields(fields);
577        assert_eq!(result.get("body").unwrap().as_str().unwrap(), "«raw»");
578    }
579
580    #[test]
581    fn test_normalize_fields_body_both() {
582        let mut fields = HashMap::new();
583        fields.insert(
584            "body".to_string(),
585            QuillValue::from_json(serde_json::json!("<<raw>> \u{202D}**bold**")),
586        );
587
588        let result = normalize_fields(fields);
589        // Bidi stripped first, then guillemets converted
590        assert_eq!(
591            result.get("body").unwrap().as_str().unwrap(),
592            "«raw» **bold**"
593        );
594    }
595
596    #[test]
597    fn test_normalize_fields_other_field_chevrons_stripped() {
598        let mut fields = HashMap::new();
599        fields.insert(
600            "title".to_string(),
601            QuillValue::from_json(serde_json::json!("<<hello>>")),
602        );
603
604        let result = normalize_fields(fields);
605        assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
606    }
607
608    #[test]
609    fn test_normalize_fields_other_field_bidi_stripped() {
610        let mut fields = HashMap::new();
611        fields.insert(
612            "title".to_string(),
613            QuillValue::from_json(serde_json::json!("he\u{202D}llo")),
614        );
615
616        let result = normalize_fields(fields);
617        assert_eq!(result.get("title").unwrap().as_str().unwrap(), "hello");
618    }
619
620    #[test]
621    fn test_normalize_fields_nested_values() {
622        let mut fields = HashMap::new();
623        fields.insert(
624            "items".to_string(),
625            QuillValue::from_json(serde_json::json!(["<<a>>", "\u{202D}b"])),
626        );
627
628        let result = normalize_fields(fields);
629        let items = result.get("items").unwrap().as_array().unwrap();
630        assert_eq!(items[0].as_str().unwrap(), "a");
631        assert_eq!(items[1].as_str().unwrap(), "b");
632    }
633
634    #[test]
635    fn test_normalize_fields_object_values() {
636        let mut fields = HashMap::new();
637        fields.insert(
638            "meta".to_string(),
639            QuillValue::from_json(serde_json::json!({
640                "title": "<<hello>>",
641                "body": "<<content>>"
642            })),
643        );
644
645        let result = normalize_fields(fields);
646        let meta = result.get("meta").unwrap();
647        let meta_obj = meta.as_object().unwrap();
648        // Nested "body" key should be recognized
649        assert_eq!(meta_obj.get("title").unwrap().as_str().unwrap(), "hello");
650        assert_eq!(meta_obj.get("body").unwrap().as_str().unwrap(), "«content»");
651    }
652
653    #[test]
654    fn test_normalize_fields_non_string_unchanged() {
655        let mut fields = HashMap::new();
656        fields.insert(
657            "count".to_string(),
658            QuillValue::from_json(serde_json::json!(42)),
659        );
660        fields.insert(
661            "enabled".to_string(),
662            QuillValue::from_json(serde_json::json!(true)),
663        );
664
665        let result = normalize_fields(fields);
666        assert_eq!(result.get("count").unwrap().as_i64().unwrap(), 42);
667        assert!(result.get("enabled").unwrap().as_bool().unwrap());
668    }
669
670    // Tests for depth limiting
671
672    #[test]
673    fn test_normalize_json_value_inner_depth_exceeded() {
674        // Create a deeply nested JSON structure that exceeds MAX_NESTING_DEPTH
675        let mut value = serde_json::json!("leaf");
676        for _ in 0..=super::MAX_NESTING_DEPTH {
677            value = serde_json::json!([value]);
678        }
679
680        // The inner function should return an error
681        let result = super::normalize_json_value_inner(value, false, 0);
682        assert!(result.is_err());
683
684        if let Err(NormalizationError::NestingTooDeep { depth, max }) = result {
685            assert!(depth > max);
686            assert_eq!(max, super::MAX_NESTING_DEPTH);
687        } else {
688            panic!("Expected NestingTooDeep error");
689        }
690    }
691
692    #[test]
693    fn test_normalize_json_value_inner_within_limit() {
694        // Create a nested structure just within the limit
695        let mut value = serde_json::json!("leaf");
696        for _ in 0..50 {
697            value = serde_json::json!([value]);
698        }
699
700        // This should succeed
701        let result = super::normalize_json_value_inner(value, false, 0);
702        assert!(result.is_ok());
703    }
704}
quillmark_core/normalize.rs

quillmark_core/
normalize.rs