Skip to main content

vimdoc_language_server/
formatter.rs

1use serde::Deserialize;
2
3use crate::parser::{Document, LineKind, SepKind};
4
5#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Deserialize)]
6#[serde(rename_all = "kebab-case")]
7pub enum ReflowMode {
8    #[default]
9    Always,
10    OnlyIfTooLong,
11    Never,
12}
13
14pub struct FormatOptions {
15    pub line_width: usize,
16    pub reflow: ReflowMode,
17    pub normalize_spacing: bool,
18}
19
20impl Default for FormatOptions {
21    fn default() -> Self {
22        Self {
23            line_width: 78,
24            reflow: ReflowMode::default(),
25            normalize_spacing: false,
26        }
27    }
28}
29
30pub(crate) fn display_width(s: &str) -> usize {
31    s.chars().count()
32}
33
34#[must_use]
35pub fn format_document(text: &str, opts: &FormatOptions) -> String {
36    let doc = Document::parse(text);
37    let raw_lines: Vec<&str> = text.lines().collect();
38    let n = doc.lines.len();
39    let mut out = Vec::with_capacity(n);
40    let mut i = 0;
41
42    while i < n {
43        let pl = &doc.lines[i];
44        match &pl.kind {
45            LineKind::Blank => {
46                out.push(String::new());
47                i += 1;
48            }
49            LineKind::Separator(kind) => {
50                let ch = match kind {
51                    SepKind::Major => '=',
52                    SepKind::Minor => '-',
53                };
54                out.push(ch.to_string().repeat(opts.line_width));
55                i += 1;
56            }
57            LineKind::CodeBody => {
58                out.push(raw_lines[i].to_string());
59                i += 1;
60            }
61            LineKind::ListItem => {
62                out.push(raw_lines[i].trim_end().to_string());
63                i += 1;
64            }
65            LineKind::Text => {
66                if pl.tag_defs.is_empty() {
67                    let indent = leading_whitespace(raw_lines[i]);
68                    if indent.is_empty() {
69                        if raw_lines[i].contains('\t') {
70                            out.push(raw_lines[i].trim_end().to_string());
71                            i += 1;
72                        } else {
73                            match opts.reflow {
74                                ReflowMode::Never => {
75                                    out.push(raw_lines[i].trim_end().to_string());
76                                    i += 1;
77                                }
78                                ReflowMode::Always | ReflowMode::OnlyIfTooLong => {
79                                    i = emit_prose_paragraph(
80                                        &raw_lines, &doc, opts, i, n, &mut out,
81                                    );
82                                }
83                            }
84                        }
85                    } else {
86                        out.push(raw_lines[i].trim_end().to_string());
87                        i += 1;
88                    }
89                } else {
90                    out.push(format_heading(raw_lines[i], pl, opts.line_width));
91                    i += 1;
92                }
93            }
94        }
95    }
96
97    let mut result = out.join("\n");
98    if text.ends_with('\n') {
99        result.push('\n');
100    }
101    result
102}
103
104fn emit_prose_paragraph(
105    raw_lines: &[&str],
106    doc: &Document,
107    opts: &FormatOptions,
108    start: usize,
109    n: usize,
110    out: &mut Vec<String>,
111) -> usize {
112    let mut j = start;
113    while j < n
114        && doc.lines[j].kind == LineKind::Text
115        && doc.lines[j].tag_defs.is_empty()
116        && leading_whitespace(raw_lines[j]).is_empty()
117        && !raw_lines[j].contains('\t')
118    {
119        j += 1;
120    }
121    if opts.reflow == ReflowMode::OnlyIfTooLong
122        && raw_lines[start..j]
123            .iter()
124            .all(|l| display_width(l) <= opts.line_width)
125    {
126        for line in &raw_lines[start..j] {
127            out.push(line.trim_end().to_string());
128        }
129        return j;
130    }
131    let num_lines = j - start;
132    let mut tokens: Vec<(&str, usize)> = Vec::new();
133    let mut pending_space: usize = 0;
134    for (idx, line) in raw_lines[start..j].iter().enumerate() {
135        let is_last_line = idx == num_lines - 1;
136        let line_tokens = split_words_with_spacing(line);
137        let len = line_tokens.len();
138        for (k, (word, trailing)) in line_tokens.into_iter().enumerate() {
139            tokens.push((word, pending_space));
140            pending_space = if opts.normalize_spacing || (!is_last_line && k == len - 1) {
141                1
142            } else {
143                trailing
144            };
145        }
146    }
147    reflow_tokens(&tokens, opts.line_width, out);
148    j
149}
150
151pub(crate) fn utf16_col_to_byte(s: &str, utf16: usize) -> usize {
152    let mut col = 0usize;
153    for (byte_pos, ch) in s.char_indices() {
154        if col >= utf16 {
155            return byte_pos;
156        }
157        col += ch.len_utf16();
158    }
159    s.len()
160}
161
162#[allow(clippy::cast_possible_truncation)]
163fn format_heading(raw: &str, pl: &crate::parser::ParsedLine, line_width: usize) -> String {
164    let tag_start_utf16 = pl.tag_defs[0].range.start.character as usize;
165    let tag_start = utf16_col_to_byte(raw, tag_start_utf16);
166
167    if tag_start == 0 {
168        return raw.trim_end().to_string();
169    }
170
171    let left = raw[..tag_start].trim_end();
172    let right: String = pl
173        .tag_defs
174        .iter()
175        .map(|s| format!("*{}*", s.name))
176        .collect::<Vec<_>>()
177        .join(" ");
178
179    if display_width(left) + 1 + display_width(&right) >= line_width {
180        return format!("{left} {right}");
181    }
182
183    let spaces = line_width - display_width(left) - display_width(&right);
184    format!("{left}{}{right}", " ".repeat(spaces))
185}
186
187fn leading_whitespace(s: &str) -> &str {
188    let trimmed = s.trim_start_matches([' ', '\t']);
189    &s[..s.len() - trimmed.len()]
190}
191
192fn split_words_with_spacing(s: &str) -> Vec<(&str, usize)> {
193    let bytes = s.as_bytes();
194    let mut result = Vec::new();
195    let mut i = 0;
196    while i < bytes.len() {
197        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
198            i += 1;
199        }
200        if i >= bytes.len() {
201            break;
202        }
203        let start = i;
204        while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
205            i += 1;
206        }
207        let word = &s[start..i];
208        let sp_start = i;
209        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
210            i += 1;
211        }
212        result.push((word, i - sp_start));
213    }
214    result
215}
216
217fn reflow_tokens(tokens: &[(&str, usize)], line_width: usize, out: &mut Vec<String>) {
218    if tokens.is_empty() {
219        return;
220    }
221    let mut line = String::new();
222    for (word, pre_space) in tokens {
223        let pre_space = *pre_space;
224        if line.is_empty() {
225            line.push_str(word);
226        } else if display_width(&line) + 1 + display_width(word) <= line_width {
227            let sp = pre_space.min(line_width - display_width(&line) - display_width(word));
228            for _ in 0..sp {
229                line.push(' ');
230            }
231            line.push_str(word);
232        } else {
233            out.push(line);
234            line = word.to_string();
235        }
236    }
237    if !line.is_empty() {
238        out.push(line);
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn normalizes_major_separator() {
248        let result = format_document(&"=".repeat(40), &FormatOptions::default());
249        assert_eq!(result.trim_end(), &"=".repeat(78));
250    }
251
252    #[test]
253    fn normalizes_minor_separator() {
254        let result = format_document(&"-".repeat(40), &FormatOptions::default());
255        assert_eq!(result.trim_end(), &"-".repeat(78));
256    }
257
258    #[test]
259    fn reflows_prose() {
260        let input = "word1 word2\nword3 word4";
261        let result = format_document(input, &FormatOptions::default());
262        assert_eq!(result, "word1 word2 word3 word4");
263    }
264
265    #[test]
266    fn preserves_code_block() {
267        let input = "example >\n    indented code\n<\nafter";
268        let result = format_document(input, &FormatOptions::default());
269        assert!(result.contains("    indented code"));
270    }
271
272    #[test]
273    fn idempotent_separator() {
274        let input = format!("{}\n", "=".repeat(78));
275        let once = format_document(&input, &FormatOptions::default());
276        let twice = format_document(&once, &FormatOptions::default());
277        assert_eq!(once, twice);
278    }
279
280    #[test]
281    fn aligns_heading_tag_right() {
282        let opts = FormatOptions {
283            line_width: 30,
284            ..Default::default()
285        };
286        let result = format_document("Introduction *intro*\n", &opts);
287        assert_eq!(result, "Introduction           *intro*\n");
288    }
289
290    #[test]
291    fn heading_tag_at_column_zero_preserved() {
292        let opts = FormatOptions {
293            line_width: 30,
294            ..Default::default()
295        };
296        let result = format_document("*intro* Introduction\n", &opts);
297        assert_eq!(result, "*intro* Introduction\n");
298    }
299
300    #[test]
301    fn preserves_code_fence_with_language() {
302        let input = "prose\n>lua\n    code()\n<\nafter\n";
303        let result = format_document(input, &FormatOptions::default());
304        assert_eq!(result, input);
305    }
306
307    #[test]
308    fn prose_not_merged_into_code_fence() {
309        let input = "This is prose.\n>lua\n    code()\n<\n";
310        let result = format_document(input, &FormatOptions::default());
311        assert_eq!(result, input);
312    }
313
314    #[test]
315    fn heading_tag_fallback_when_line_too_long() {
316        let opts = FormatOptions {
317            line_width: 20,
318            ..Default::default()
319        };
320        let result = format_document("A very long heading        *tag*\n", &opts);
321        assert_eq!(result, "A very long heading *tag*\n");
322    }
323
324    #[test]
325    fn list_items_not_merged() {
326        let input = "- item 1\n- item 2\n- item 3\n";
327        let result = format_document(input, &FormatOptions::default());
328        assert_eq!(result, input);
329    }
330
331    #[test]
332    fn list_item_not_merged_with_preceding_prose() {
333        let input = "Prose intro.\n- Item.\n";
334        let result = format_document(input, &FormatOptions::default());
335        assert_eq!(result, input);
336    }
337
338    #[test]
339    fn asterisk_list_item_preserved() {
340        let input = "* item text\n";
341        let result = format_document(input, &FormatOptions::default());
342        assert_eq!(result, input);
343    }
344
345    #[test]
346    fn tab_command_ref_preserved() {
347        let input = "CTRL-V\t\tInsert next non-digit literally.\n";
348        let result = format_document(input, &FormatOptions::default());
349        assert_eq!(result, input);
350    }
351
352    #[test]
353    fn tab_line_not_merged_with_adjacent_prose() {
354        let input = "Prose before.\nCTRL-V\t\tDescription.\nProse after.\n";
355        let result = format_document(input, &FormatOptions::default());
356        assert_eq!(result, input);
357    }
358
359    #[test]
360    fn tab_idempotent() {
361        let input = "CTRL-V\t\tInsert next non-digit literally.\n\t\tcontinuation line.\n";
362        let once = format_document(input, &FormatOptions::default());
363        let twice = format_document(&once, &FormatOptions::default());
364        assert_eq!(once, twice);
365    }
366
367    #[test]
368    fn ordered_list_items_not_merged() {
369        let input = "1. First item\n2. Second item\n3. Third item\n";
370        let result = format_document(input, &FormatOptions::default());
371        assert_eq!(result, input);
372    }
373
374    #[test]
375    fn ordered_list_not_merged_with_prose() {
376        let input = "Intro text.\n1. First item\n2. Second item\n";
377        let result = format_document(input, &FormatOptions::default());
378        assert_eq!(result, input);
379    }
380
381    #[test]
382    fn double_space_after_period_preserved() {
383        let input = "First sentence.  Second sentence.\n";
384        let result = format_document(input, &FormatOptions::default());
385        assert_eq!(result, input);
386    }
387
388    #[test]
389    fn double_space_preserved_during_reflow() {
390        let input = "The quick brown fox.  The lazy dog sat.\n";
391        let result = format_document(input, &FormatOptions::default());
392        assert_eq!(result, input);
393    }
394
395    #[test]
396    fn line_break_joins_with_single_space() {
397        let input = "word1 word2\nword3 word4";
398        let result = format_document(input, &FormatOptions::default());
399        assert_eq!(result, "word1 word2 word3 word4");
400    }
401
402    #[test]
403    fn multi_space_internal_preserved() {
404        let input = "Vi      \"the original\".\n";
405        let result = format_document(input, &FormatOptions::default());
406        assert_eq!(result, input);
407    }
408
409    #[test]
410    fn reflow_never_preserves_line_breaks() {
411        let input = "word1 word2\nword3 word4";
412        let opts = FormatOptions {
413            reflow: ReflowMode::Never,
414            ..Default::default()
415        };
416        let result = format_document(input, &opts);
417        assert_eq!(result, input);
418    }
419
420    #[test]
421    fn reflow_only_if_too_long_skips_short_paragraph() {
422        let input = "Short line.\nAnother short line.\n";
423        let opts = FormatOptions {
424            reflow: ReflowMode::OnlyIfTooLong,
425            ..Default::default()
426        };
427        let result = format_document(input, &opts);
428        assert_eq!(result, input);
429    }
430
431    #[test]
432    fn reflow_only_if_too_long_reflows_overlong_paragraph() {
433        let input = format!("{}\n", "word ".repeat(20).trim_end());
434        let opts = FormatOptions {
435            reflow: ReflowMode::OnlyIfTooLong,
436            ..Default::default()
437        };
438        let result = format_document(&input, &opts);
439        assert_ne!(result, input);
440        assert!(result.lines().all(|l| l.len() <= 78));
441    }
442
443    #[test]
444    fn normalize_spacing_collapses_double_space() {
445        let input = "First sentence.  Second sentence.\n";
446        let opts = FormatOptions {
447            normalize_spacing: true,
448            ..Default::default()
449        };
450        let result = format_document(input, &opts);
451        assert_eq!(result, "First sentence. Second sentence.\n");
452    }
453}