Skip to main content

c12_parser/
format.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Information about formatting (indentation and outer whitespace)
5/// captured from the original text.
6#[derive(Clone, Debug)]
7pub struct FormatInfo {
8    pub sample: Option<String>,
9    pub whitespace_start: String,
10    pub whitespace_end: String,
11}
12
13/// Options that control how formatting is detected and preserved.
14#[derive(Clone, Debug)]
15pub struct FormatOptions {
16    /// Explicit indent to use when stringifying. When `None`,
17    /// indentation is auto-detected from the original text (if enabled).
18    pub indent: Option<usize>,
19
20    /// If `false`, indentation from the original text will not be
21    /// auto-detected, even if a sample is present.
22    pub preserve_indentation: bool,
23
24    /// If `false`, leading and trailing whitespace around the value
25    /// will not be preserved.
26    pub preserve_whitespace: bool,
27
28    /// Number of characters to sample from the start of the text
29    /// when detecting indentation.
30    pub sample_size: usize,
31}
32
33impl Default for FormatOptions {
34    fn default() -> Self {
35        Self {
36            indent: None,
37            preserve_indentation: true,
38            preserve_whitespace: true,
39            sample_size: 1024,
40        }
41    }
42}
43
44pub(crate) fn detect_format(text: &str, opts: &FormatOptions) -> FormatInfo {
45    let sample = if opts.indent.is_none() && opts.preserve_indentation {
46        Some(text.chars().take(opts.sample_size).collect::<String>())
47    } else {
48        None
49    };
50
51    static START_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\s+)").unwrap());
52    static END_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\s+)$").unwrap());
53
54    let (whitespace_start, whitespace_end) = if opts.preserve_whitespace {
55        let ws_start = START_RE
56            .captures(text)
57            .and_then(|c| c.get(0))
58            .map(|m| m.as_str().to_string())
59            .unwrap_or_default();
60        let ws_end = END_RE
61            .captures(text)
62            .and_then(|c| c.get(0))
63            .map(|m| m.as_str().to_string())
64            .unwrap_or_default();
65
66        (ws_start, ws_end)
67    } else {
68        (String::new(), String::new())
69    };
70
71    FormatInfo {
72        sample,
73        whitespace_start,
74        whitespace_end,
75    }
76}
77
78pub(crate) fn compute_indent(info: &FormatInfo, opts: &FormatOptions) -> usize {
79    if let Some(explicit) = opts.indent {
80        return explicit;
81    }
82
83    if let Some(sample) = &info.sample {
84        // Naive indent detection: find the first non-empty line and
85        // count its leading spaces.
86        for line in sample.lines() {
87            let trimmed = line.trim_start();
88            if trimmed.is_empty() {
89                continue;
90            }
91            let indent_len = line.len() - trimmed.len();
92            if indent_len > 0 {
93                return indent_len;
94            }
95        }
96    }
97
98    // Default indent size if nothing else is detected
99    2
100}
101
102/// A value bundled with its detected formatting information.
103#[derive(Clone, Debug)]
104pub struct Formatted<T> {
105    pub value: T,
106    pub format: FormatInfo,
107}
108
109impl<T> Formatted<T> {
110    pub fn new(text: &str, value: T, opts: &FormatOptions) -> Self {
111        let format = detect_format(text, opts);
112        Self { value, format }
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn detect_format_captures_outer_whitespace_and_sample() {
122        let text = "\n  {\"a\": 1}\n\n";
123        let opts = FormatOptions::default();
124        let info = detect_format(text, &opts);
125
126        // 由于使用的是基于正则的 `^(\s+)`,这里会把换行符和紧随其后的两个空格
127        // 一并视为“前导空白”捕获出来。
128        assert_eq!(info.whitespace_start, "\n  ");
129        assert_eq!(info.whitespace_end, "\n\n");
130        assert!(info.sample.is_some());
131        assert!(info.sample.as_ref().unwrap().contains("{\"a\": 1}"));
132    }
133
134    #[test]
135    fn detect_format_respects_preserve_flags() {
136        let text = "   {\"a\": 1}   ";
137        let mut opts = FormatOptions::default();
138        opts.preserve_whitespace = false;
139        opts.preserve_indentation = false;
140
141        let info = detect_format(text, &opts);
142        assert!(info.sample.is_none());
143        assert!(info.whitespace_start.is_empty());
144        assert!(info.whitespace_end.is_empty());
145    }
146
147    #[test]
148    fn compute_indent_prefers_explicit_indent() {
149        let info = FormatInfo {
150            sample: Some("  key: 1".into()),
151            whitespace_start: String::new(),
152            whitespace_end: String::new(),
153        };
154        let mut opts = FormatOptions::default();
155        opts.indent = Some(4);
156
157        assert_eq!(compute_indent(&info, &opts), 4);
158    }
159
160    #[test]
161    fn compute_indent_detects_from_sample() {
162        let info = FormatInfo {
163            sample: Some("  key: 1\n    child: 2".into()),
164            whitespace_start: String::new(),
165            whitespace_end: String::new(),
166        };
167        let opts = FormatOptions::default();
168
169        assert_eq!(compute_indent(&info, &opts), 2);
170    }
171
172    #[test]
173    fn compute_indent_falls_back_to_default() {
174        let info = FormatInfo {
175            sample: Some("\n\n".into()),
176            whitespace_start: String::new(),
177            whitespace_end: String::new(),
178        };
179        let opts = FormatOptions::default();
180
181        assert_eq!(compute_indent(&info, &opts), 2);
182    }
183}