Skip to main content

vimdoc_language_server/
formatter.rs

1use serde::Deserialize;
2
3use crate::parser::{Document, LineKind, SepKind};
4
5#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Deserialize)]
6#[serde(rename_all = "kebab-case")]
7pub enum ReflowMode {
8    #[default]
9    Always,
10    OnlyIfTooLong,
11    Never,
12}
13
14pub struct FormatOptions {
15    pub line_width: usize,
16    pub reflow: ReflowMode,
17    pub normalize_spacing: bool,
18}
19
20impl Default for FormatOptions {
21    fn default() -> Self {
22        Self {
23            line_width: 78,
24            reflow: ReflowMode::default(),
25            normalize_spacing: false,
26        }
27    }
28}
29
30pub(crate) fn display_width(s: &str) -> usize {
31    s.chars().count()
32}
33
34#[must_use]
35pub fn format_document(text: &str, opts: &FormatOptions) -> String {
36    let doc = Document::parse(text);
37    let raw_lines: Vec<&str> = text.lines().collect();
38    let n = doc.lines.len();
39    let mut out = Vec::with_capacity(n);
40    let mut i = 0;
41
42    while i < n {
43        let pl = &doc.lines[i];
44        match &pl.kind {
45            LineKind::Blank => {
46                out.push(String::new());
47                i += 1;
48            }
49            LineKind::Separator(kind) => {
50                let ch = match kind {
51                    SepKind::Major => '=',
52                    SepKind::Minor => '-',
53                };
54                out.push(ch.to_string().repeat(opts.line_width));
55                i += 1;
56            }
57            LineKind::CodeBody => {
58                out.push(raw_lines[i].to_string());
59                i += 1;
60            }
61            LineKind::ListItem => {
62                out.push(raw_lines[i].trim_end().to_string());
63                i += 1;
64            }
65            LineKind::Text => {
66                if pl.tag_defs.is_empty() {
67                    let indent = leading_whitespace(raw_lines[i]);
68                    if indent.is_empty() {
69                        if raw_lines[i].contains('\t') || is_pipe_table_row(raw_lines[i]) {
70                            out.push(raw_lines[i].trim_end().to_string());
71                            i += 1;
72                        } else {
73                            match opts.reflow {
74                                ReflowMode::Never => {
75                                    out.push(raw_lines[i].trim_end().to_string());
76                                    i += 1;
77                                }
78                                ReflowMode::Always | ReflowMode::OnlyIfTooLong => {
79                                    i = emit_prose_paragraph(
80                                        &raw_lines, &doc, opts, i, n, &mut out,
81                                    );
82                                }
83                            }
84                        }
85                    } else {
86                        out.push(raw_lines[i].trim_end().to_string());
87                        i += 1;
88                    }
89                } else {
90                    out.push(format_heading(raw_lines[i], pl, opts.line_width));
91                    i += 1;
92                }
93            }
94        }
95    }
96
97    let mut result = out.join("\n");
98    if text.ends_with('\n') {
99        result.push('\n');
100    }
101    result
102}
103
104fn emit_prose_paragraph(
105    raw_lines: &[&str],
106    doc: &Document,
107    opts: &FormatOptions,
108    start: usize,
109    n: usize,
110    out: &mut Vec<String>,
111) -> usize {
112    let mut j = start;
113    while j < n
114        && doc.lines[j].kind == LineKind::Text
115        && doc.lines[j].tag_defs.is_empty()
116        && leading_whitespace(raw_lines[j]).is_empty()
117        && !raw_lines[j].contains('\t')
118        && !is_pipe_table_row(raw_lines[j])
119    {
120        j += 1;
121    }
122    if opts.reflow == ReflowMode::OnlyIfTooLong
123        && raw_lines[start..j]
124            .iter()
125            .all(|l| display_width(l) <= opts.line_width)
126    {
127        for line in &raw_lines[start..j] {
128            out.push(line.trim_end().to_string());
129        }
130        return j;
131    }
132    let num_lines = j - start;
133    let mut tokens: Vec<(&str, usize)> = Vec::new();
134    let mut pending_space: usize = 0;
135    for (idx, line) in raw_lines[start..j].iter().enumerate() {
136        let is_last_line = idx == num_lines - 1;
137        let line_tokens = split_words_with_spacing(line);
138        let len = line_tokens.len();
139        for (k, (word, trailing)) in line_tokens.into_iter().enumerate() {
140            tokens.push((word, pending_space));
141            pending_space = if opts.normalize_spacing || (!is_last_line && k == len - 1) {
142                1
143            } else {
144                trailing
145            };
146        }
147    }
148    reflow_tokens(&tokens, opts.line_width, out);
149    j
150}
151
152pub(crate) fn utf16_col_to_byte(s: &str, utf16: usize) -> usize {
153    let mut col = 0usize;
154    for (byte_pos, ch) in s.char_indices() {
155        if col >= utf16 {
156            return byte_pos;
157        }
158        col += ch.len_utf16();
159    }
160    s.len()
161}
162
163#[allow(clippy::cast_possible_truncation)]
164fn format_heading(raw: &str, pl: &crate::parser::ParsedLine, line_width: usize) -> String {
165    let tag_start_utf16 = pl.tag_defs[0].range.start.character as usize;
166    let tag_start = utf16_col_to_byte(raw, tag_start_utf16);
167
168    if tag_start == 0 {
169        return raw.trim_end().to_string();
170    }
171
172    let left = raw[..tag_start].trim_end();
173    let right: String = pl
174        .tag_defs
175        .iter()
176        .map(|s| format!("*{}*", s.name))
177        .collect::<Vec<_>>()
178        .join(" ");
179
180    if display_width(left) + 1 + display_width(&right) >= line_width {
181        return format!("{left} {right}");
182    }
183
184    let spaces = line_width - display_width(left) - display_width(&right);
185    format!("{left}{}{right}", " ".repeat(spaces))
186}
187
188fn leading_whitespace(s: &str) -> &str {
189    let trimmed = s.trim_start_matches([' ', '\t']);
190    &s[..s.len() - trimmed.len()]
191}
192
193fn is_pipe_table_row(s: &str) -> bool {
194    let trimmed = s.trim_end();
195    trimmed.starts_with('|') && trimmed.len() > 1 && trimmed.ends_with('|')
196}
197
198fn split_words_with_spacing(s: &str) -> Vec<(&str, usize)> {
199    let bytes = s.as_bytes();
200    let mut result = Vec::new();
201    let mut i = 0;
202    while i < bytes.len() {
203        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
204            i += 1;
205        }
206        if i >= bytes.len() {
207            break;
208        }
209        let start = i;
210        while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
211            i += 1;
212        }
213        let word = &s[start..i];
214        let sp_start = i;
215        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
216            i += 1;
217        }
218        result.push((word, i - sp_start));
219    }
220    result
221}
222
223fn reflow_tokens(tokens: &[(&str, usize)], line_width: usize, out: &mut Vec<String>) {
224    if tokens.is_empty() {
225        return;
226    }
227    let mut line = String::new();
228    for (word, pre_space) in tokens {
229        let pre_space = *pre_space;
230        if line.is_empty() {
231            line.push_str(word);
232        } else if display_width(&line) + 1 + display_width(word) <= line_width {
233            let sp = pre_space.min(line_width - display_width(&line) - display_width(word));
234            for _ in 0..sp {
235                line.push(' ');
236            }
237            line.push_str(word);
238        } else {
239            out.push(line);
240            line = word.to_string();
241        }
242    }
243    if !line.is_empty() {
244        out.push(line);
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn normalizes_major_separator() {
254        let result = format_document(&"=".repeat(40), &FormatOptions::default());
255        assert_eq!(result.trim_end(), &"=".repeat(78));
256    }
257
258    #[test]
259    fn normalizes_minor_separator() {
260        let result = format_document(&"-".repeat(40), &FormatOptions::default());
261        assert_eq!(result.trim_end(), &"-".repeat(78));
262    }
263
264    #[test]
265    fn reflows_prose() {
266        let input = "word1 word2\nword3 word4";
267        let result = format_document(input, &FormatOptions::default());
268        assert_eq!(result, "word1 word2 word3 word4");
269    }
270
271    #[test]
272    fn preserves_code_block() {
273        let input = "example >\n    indented code\n<\nafter";
274        let result = format_document(input, &FormatOptions::default());
275        assert!(result.contains("    indented code"));
276    }
277
278    #[test]
279    fn idempotent_separator() {
280        let input = format!("{}\n", "=".repeat(78));
281        let once = format_document(&input, &FormatOptions::default());
282        let twice = format_document(&once, &FormatOptions::default());
283        assert_eq!(once, twice);
284    }
285
286    #[test]
287    fn aligns_heading_tag_right() {
288        let opts = FormatOptions {
289            line_width: 30,
290            ..Default::default()
291        };
292        let result = format_document("Introduction *intro*\n", &opts);
293        assert_eq!(result, "Introduction           *intro*\n");
294    }
295
296    #[test]
297    fn heading_tag_at_column_zero_preserved() {
298        let opts = FormatOptions {
299            line_width: 30,
300            ..Default::default()
301        };
302        let result = format_document("*intro* Introduction\n", &opts);
303        assert_eq!(result, "*intro* Introduction\n");
304    }
305
306    #[test]
307    fn preserves_code_fence_with_language() {
308        let input = "prose\n>lua\n    code()\n<\nafter\n";
309        let result = format_document(input, &FormatOptions::default());
310        assert_eq!(result, input);
311    }
312
313    #[test]
314    fn prose_not_merged_into_code_fence() {
315        let input = "This is prose.\n>lua\n    code()\n<\n";
316        let result = format_document(input, &FormatOptions::default());
317        assert_eq!(result, input);
318    }
319
320    #[test]
321    fn heading_tag_fallback_when_line_too_long() {
322        let opts = FormatOptions {
323            line_width: 20,
324            ..Default::default()
325        };
326        let result = format_document("A very long heading        *tag*\n", &opts);
327        assert_eq!(result, "A very long heading *tag*\n");
328    }
329
330    #[test]
331    fn list_items_not_merged() {
332        let input = "- item 1\n- item 2\n- item 3\n";
333        let result = format_document(input, &FormatOptions::default());
334        assert_eq!(result, input);
335    }
336
337    #[test]
338    fn list_item_not_merged_with_preceding_prose() {
339        let input = "Prose intro.\n- Item.\n";
340        let result = format_document(input, &FormatOptions::default());
341        assert_eq!(result, input);
342    }
343
344    #[test]
345    fn asterisk_list_item_preserved() {
346        let input = "* item text\n";
347        let result = format_document(input, &FormatOptions::default());
348        assert_eq!(result, input);
349    }
350
351    #[test]
352    fn tab_command_ref_preserved() {
353        let input = "CTRL-V\t\tInsert next non-digit literally.\n";
354        let result = format_document(input, &FormatOptions::default());
355        assert_eq!(result, input);
356    }
357
358    #[test]
359    fn tab_line_not_merged_with_adjacent_prose() {
360        let input = "Prose before.\nCTRL-V\t\tDescription.\nProse after.\n";
361        let result = format_document(input, &FormatOptions::default());
362        assert_eq!(result, input);
363    }
364
365    #[test]
366    fn tab_idempotent() {
367        let input = "CTRL-V\t\tInsert next non-digit literally.\n\t\tcontinuation line.\n";
368        let once = format_document(input, &FormatOptions::default());
369        let twice = format_document(&once, &FormatOptions::default());
370        assert_eq!(once, twice);
371    }
372
373    #[test]
374    fn ordered_list_items_not_merged() {
375        let input = "1. First item\n2. Second item\n3. Third item\n";
376        let result = format_document(input, &FormatOptions::default());
377        assert_eq!(result, input);
378    }
379
380    #[test]
381    fn ordered_list_not_merged_with_prose() {
382        let input = "Intro text.\n1. First item\n2. Second item\n";
383        let result = format_document(input, &FormatOptions::default());
384        assert_eq!(result, input);
385    }
386
387    #[test]
388    fn double_space_after_period_preserved() {
389        let input = "First sentence.  Second sentence.\n";
390        let result = format_document(input, &FormatOptions::default());
391        assert_eq!(result, input);
392    }
393
394    #[test]
395    fn double_space_preserved_during_reflow() {
396        let input = "The quick brown fox.  The lazy dog sat.\n";
397        let result = format_document(input, &FormatOptions::default());
398        assert_eq!(result, input);
399    }
400
401    #[test]
402    fn line_break_joins_with_single_space() {
403        let input = "word1 word2\nword3 word4";
404        let result = format_document(input, &FormatOptions::default());
405        assert_eq!(result, "word1 word2 word3 word4");
406    }
407
408    #[test]
409    fn multi_space_internal_preserved() {
410        let input = "Vi      \"the original\".\n";
411        let result = format_document(input, &FormatOptions::default());
412        assert_eq!(result, input);
413    }
414
415    #[test]
416    fn reflow_never_preserves_line_breaks() {
417        let input = "word1 word2\nword3 word4";
418        let opts = FormatOptions {
419            reflow: ReflowMode::Never,
420            ..Default::default()
421        };
422        let result = format_document(input, &opts);
423        assert_eq!(result, input);
424    }
425
426    #[test]
427    fn reflow_only_if_too_long_skips_short_paragraph() {
428        let input = "Short line.\nAnother short line.\n";
429        let opts = FormatOptions {
430            reflow: ReflowMode::OnlyIfTooLong,
431            ..Default::default()
432        };
433        let result = format_document(input, &opts);
434        assert_eq!(result, input);
435    }
436
437    #[test]
438    fn reflow_only_if_too_long_reflows_overlong_paragraph() {
439        let input = format!("{}\n", "word ".repeat(20).trim_end());
440        let opts = FormatOptions {
441            reflow: ReflowMode::OnlyIfTooLong,
442            ..Default::default()
443        };
444        let result = format_document(&input, &opts);
445        assert_ne!(result, input);
446        assert!(result.lines().all(|l| l.len() <= 78));
447    }
448
449    #[test]
450    fn pipe_table_padded_preserved() {
451        let input = "\
452| Command  | List           |
453| -------- | -------------- |
454| `files`  | find or fd     |
455| `buffers` | open buffers  |
456";
457        let result = format_document(input, &FormatOptions::default());
458        assert_eq!(result, input);
459    }
460
461    #[test]
462    fn pipe_table_tight_preserved() {
463        let input = "\
464|Prefix     |Behavior                           |
465|-----------|-----------------------------------|
466|`no prefix`|Files                              |
467|`$`        |Buffers                            |
468";
469        let result = format_document(input, &FormatOptions::default());
470        assert_eq!(result, input);
471    }
472
473    #[test]
474    fn pipe_table_not_merged_with_adjacent_prose() {
475        let input = "\
476Prose before the table.
477
478| Command  | List       |
479| -------- | ---------- |
480| `files`  | find or fd |
481
482Prose after the table.
483";
484        let result = format_document(input, &FormatOptions::default());
485        assert_eq!(result, input);
486    }
487
488    #[test]
489    fn pipe_table_idempotent() {
490        let input = "\
491| Key       | Command           | Key       | Command           |
492| ----------| ------------------| ----------| ------------------|
493| `<C-\\>`    | buffers           | `<C-p>`     | files             |
494";
495        let once = format_document(input, &FormatOptions::default());
496        let twice = format_document(&once, &FormatOptions::default());
497        assert_eq!(once, twice);
498    }
499
500    #[test]
501    fn pipe_table_prose_after_not_blocked() {
502        let input = "\
503| Col | Val |
504
505word1 word2
506word3 word4
507";
508        let result = format_document(input, &FormatOptions::default());
509        assert!(result.contains("| Col | Val |"));
510        assert!(result.contains("word1 word2 word3 word4"));
511    }
512
513    #[test]
514    fn normalize_spacing_collapses_double_space() {
515        let input = "First sentence.  Second sentence.\n";
516        let opts = FormatOptions {
517            normalize_spacing: true,
518            ..Default::default()
519        };
520        let result = format_document(input, &opts);
521        assert_eq!(result, "First sentence. Second sentence.\n");
522    }
523}