1use serde::Deserialize;
2
3use crate::parser::{Document, LineKind, SepKind};
4
5#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Deserialize)]
6#[serde(rename_all = "kebab-case")]
7pub enum ReflowMode {
8 #[default]
9 Always,
10 OnlyIfTooLong,
11 Never,
12}
13
14pub struct FormatOptions {
15 pub line_width: usize,
16 pub reflow: ReflowMode,
17 pub normalize_spacing: bool,
18}
19
20impl Default for FormatOptions {
21 fn default() -> Self {
22 Self {
23 line_width: 78,
24 reflow: ReflowMode::default(),
25 normalize_spacing: false,
26 }
27 }
28}
29
30pub(crate) fn display_width(s: &str) -> usize {
31 s.chars().count()
32}
33
34#[must_use]
35pub fn format_document(text: &str, opts: &FormatOptions) -> String {
36 let doc = Document::parse(text);
37 let raw_lines: Vec<&str> = text.lines().collect();
38 let n = doc.lines.len();
39 let mut out = Vec::with_capacity(n);
40 let mut i = 0;
41
42 while i < n {
43 let pl = &doc.lines[i];
44 match &pl.kind {
45 LineKind::Blank => {
46 out.push(String::new());
47 i += 1;
48 }
49 LineKind::Separator(kind) => {
50 let ch = match kind {
51 SepKind::Major => '=',
52 SepKind::Minor => '-',
53 };
54 out.push(ch.to_string().repeat(opts.line_width));
55 i += 1;
56 }
57 LineKind::CodeBody => {
58 out.push(raw_lines[i].to_string());
59 i += 1;
60 }
61 LineKind::ListItem => {
62 out.push(raw_lines[i].trim_end().to_string());
63 i += 1;
64 }
65 LineKind::Text => {
66 if pl.tag_defs.is_empty() {
67 let indent = leading_whitespace(raw_lines[i]);
68 if indent.is_empty() {
69 if raw_lines[i].contains('\t') || is_pipe_table_row(raw_lines[i]) {
70 out.push(raw_lines[i].trim_end().to_string());
71 i += 1;
72 } else {
73 match opts.reflow {
74 ReflowMode::Never => {
75 out.push(raw_lines[i].trim_end().to_string());
76 i += 1;
77 }
78 ReflowMode::Always | ReflowMode::OnlyIfTooLong => {
79 i = emit_prose_paragraph(
80 &raw_lines, &doc, opts, i, n, &mut out,
81 );
82 }
83 }
84 }
85 } else {
86 out.push(raw_lines[i].trim_end().to_string());
87 i += 1;
88 }
89 } else {
90 out.push(format_heading(raw_lines[i], pl, opts.line_width));
91 i += 1;
92 }
93 }
94 }
95 }
96
97 let mut result = out.join("\n");
98 if text.ends_with('\n') {
99 result.push('\n');
100 }
101 result
102}
103
104fn emit_prose_paragraph(
105 raw_lines: &[&str],
106 doc: &Document,
107 opts: &FormatOptions,
108 start: usize,
109 n: usize,
110 out: &mut Vec<String>,
111) -> usize {
112 let mut j = start;
113 while j < n
114 && doc.lines[j].kind == LineKind::Text
115 && doc.lines[j].tag_defs.is_empty()
116 && leading_whitespace(raw_lines[j]).is_empty()
117 && !raw_lines[j].contains('\t')
118 && !is_pipe_table_row(raw_lines[j])
119 {
120 j += 1;
121 }
122 if opts.reflow == ReflowMode::OnlyIfTooLong
123 && raw_lines[start..j]
124 .iter()
125 .all(|l| display_width(l) <= opts.line_width)
126 {
127 for line in &raw_lines[start..j] {
128 out.push(line.trim_end().to_string());
129 }
130 return j;
131 }
132 let num_lines = j - start;
133 let mut tokens: Vec<(&str, usize)> = Vec::new();
134 let mut pending_space: usize = 0;
135 for (idx, line) in raw_lines[start..j].iter().enumerate() {
136 let is_last_line = idx == num_lines - 1;
137 let line_tokens = split_words_with_spacing(line);
138 let len = line_tokens.len();
139 for (k, (word, trailing)) in line_tokens.into_iter().enumerate() {
140 tokens.push((word, pending_space));
141 pending_space = if opts.normalize_spacing || (!is_last_line && k == len - 1) {
142 1
143 } else {
144 trailing
145 };
146 }
147 }
148 reflow_tokens(&tokens, opts.line_width, out);
149 j
150}
151
152pub(crate) fn utf16_col_to_byte(s: &str, utf16: usize) -> usize {
153 let mut col = 0usize;
154 for (byte_pos, ch) in s.char_indices() {
155 if col >= utf16 {
156 return byte_pos;
157 }
158 col += ch.len_utf16();
159 }
160 s.len()
161}
162
163#[allow(clippy::cast_possible_truncation)]
164fn format_heading(raw: &str, pl: &crate::parser::ParsedLine, line_width: usize) -> String {
165 let tag_start_utf16 = pl.tag_defs[0].range.start.character as usize;
166 let tag_start = utf16_col_to_byte(raw, tag_start_utf16);
167
168 if tag_start == 0 {
169 return raw.trim_end().to_string();
170 }
171
172 let left = raw[..tag_start].trim_end();
173 let right: String = pl
174 .tag_defs
175 .iter()
176 .map(|s| format!("*{}*", s.name))
177 .collect::<Vec<_>>()
178 .join(" ");
179
180 if display_width(left) + 1 + display_width(&right) >= line_width {
181 return format!("{left} {right}");
182 }
183
184 let spaces = line_width - display_width(left) - display_width(&right);
185 format!("{left}{}{right}", " ".repeat(spaces))
186}
187
188fn leading_whitespace(s: &str) -> &str {
189 let trimmed = s.trim_start_matches([' ', '\t']);
190 &s[..s.len() - trimmed.len()]
191}
192
193fn is_pipe_table_row(s: &str) -> bool {
194 let trimmed = s.trim_end();
195 trimmed.starts_with('|') && trimmed.len() > 1 && trimmed.ends_with('|')
196}
197
198fn split_words_with_spacing(s: &str) -> Vec<(&str, usize)> {
199 let bytes = s.as_bytes();
200 let mut result = Vec::new();
201 let mut i = 0;
202 while i < bytes.len() {
203 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
204 i += 1;
205 }
206 if i >= bytes.len() {
207 break;
208 }
209 let start = i;
210 while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
211 i += 1;
212 }
213 let word = &s[start..i];
214 let sp_start = i;
215 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
216 i += 1;
217 }
218 result.push((word, i - sp_start));
219 }
220 result
221}
222
223fn reflow_tokens(tokens: &[(&str, usize)], line_width: usize, out: &mut Vec<String>) {
224 if tokens.is_empty() {
225 return;
226 }
227 let mut line = String::new();
228 for (word, pre_space) in tokens {
229 let pre_space = *pre_space;
230 if line.is_empty() {
231 line.push_str(word);
232 } else if display_width(&line) + 1 + display_width(word) <= line_width {
233 let sp = pre_space.min(line_width - display_width(&line) - display_width(word));
234 for _ in 0..sp {
235 line.push(' ');
236 }
237 line.push_str(word);
238 } else {
239 out.push(line);
240 line = word.to_string();
241 }
242 }
243 if !line.is_empty() {
244 out.push(line);
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
253 fn normalizes_major_separator() {
254 let result = format_document(&"=".repeat(40), &FormatOptions::default());
255 assert_eq!(result.trim_end(), &"=".repeat(78));
256 }
257
258 #[test]
259 fn normalizes_minor_separator() {
260 let result = format_document(&"-".repeat(40), &FormatOptions::default());
261 assert_eq!(result.trim_end(), &"-".repeat(78));
262 }
263
264 #[test]
265 fn reflows_prose() {
266 let input = "word1 word2\nword3 word4";
267 let result = format_document(input, &FormatOptions::default());
268 assert_eq!(result, "word1 word2 word3 word4");
269 }
270
271 #[test]
272 fn preserves_code_block() {
273 let input = "example >\n indented code\n<\nafter";
274 let result = format_document(input, &FormatOptions::default());
275 assert!(result.contains(" indented code"));
276 }
277
278 #[test]
279 fn idempotent_separator() {
280 let input = format!("{}\n", "=".repeat(78));
281 let once = format_document(&input, &FormatOptions::default());
282 let twice = format_document(&once, &FormatOptions::default());
283 assert_eq!(once, twice);
284 }
285
286 #[test]
287 fn aligns_heading_tag_right() {
288 let opts = FormatOptions {
289 line_width: 30,
290 ..Default::default()
291 };
292 let result = format_document("Introduction *intro*\n", &opts);
293 assert_eq!(result, "Introduction *intro*\n");
294 }
295
296 #[test]
297 fn heading_tag_at_column_zero_preserved() {
298 let opts = FormatOptions {
299 line_width: 30,
300 ..Default::default()
301 };
302 let result = format_document("*intro* Introduction\n", &opts);
303 assert_eq!(result, "*intro* Introduction\n");
304 }
305
306 #[test]
307 fn preserves_code_fence_with_language() {
308 let input = "prose\n>lua\n code()\n<\nafter\n";
309 let result = format_document(input, &FormatOptions::default());
310 assert_eq!(result, input);
311 }
312
313 #[test]
314 fn prose_not_merged_into_code_fence() {
315 let input = "This is prose.\n>lua\n code()\n<\n";
316 let result = format_document(input, &FormatOptions::default());
317 assert_eq!(result, input);
318 }
319
320 #[test]
321 fn heading_tag_fallback_when_line_too_long() {
322 let opts = FormatOptions {
323 line_width: 20,
324 ..Default::default()
325 };
326 let result = format_document("A very long heading *tag*\n", &opts);
327 assert_eq!(result, "A very long heading *tag*\n");
328 }
329
330 #[test]
331 fn list_items_not_merged() {
332 let input = "- item 1\n- item 2\n- item 3\n";
333 let result = format_document(input, &FormatOptions::default());
334 assert_eq!(result, input);
335 }
336
337 #[test]
338 fn list_item_not_merged_with_preceding_prose() {
339 let input = "Prose intro.\n- Item.\n";
340 let result = format_document(input, &FormatOptions::default());
341 assert_eq!(result, input);
342 }
343
344 #[test]
345 fn asterisk_list_item_preserved() {
346 let input = "* item text\n";
347 let result = format_document(input, &FormatOptions::default());
348 assert_eq!(result, input);
349 }
350
351 #[test]
352 fn tab_command_ref_preserved() {
353 let input = "CTRL-V\t\tInsert next non-digit literally.\n";
354 let result = format_document(input, &FormatOptions::default());
355 assert_eq!(result, input);
356 }
357
358 #[test]
359 fn tab_line_not_merged_with_adjacent_prose() {
360 let input = "Prose before.\nCTRL-V\t\tDescription.\nProse after.\n";
361 let result = format_document(input, &FormatOptions::default());
362 assert_eq!(result, input);
363 }
364
365 #[test]
366 fn tab_idempotent() {
367 let input = "CTRL-V\t\tInsert next non-digit literally.\n\t\tcontinuation line.\n";
368 let once = format_document(input, &FormatOptions::default());
369 let twice = format_document(&once, &FormatOptions::default());
370 assert_eq!(once, twice);
371 }
372
373 #[test]
374 fn ordered_list_items_not_merged() {
375 let input = "1. First item\n2. Second item\n3. Third item\n";
376 let result = format_document(input, &FormatOptions::default());
377 assert_eq!(result, input);
378 }
379
380 #[test]
381 fn ordered_list_not_merged_with_prose() {
382 let input = "Intro text.\n1. First item\n2. Second item\n";
383 let result = format_document(input, &FormatOptions::default());
384 assert_eq!(result, input);
385 }
386
387 #[test]
388 fn double_space_after_period_preserved() {
389 let input = "First sentence. Second sentence.\n";
390 let result = format_document(input, &FormatOptions::default());
391 assert_eq!(result, input);
392 }
393
394 #[test]
395 fn double_space_preserved_during_reflow() {
396 let input = "The quick brown fox. The lazy dog sat.\n";
397 let result = format_document(input, &FormatOptions::default());
398 assert_eq!(result, input);
399 }
400
401 #[test]
402 fn line_break_joins_with_single_space() {
403 let input = "word1 word2\nword3 word4";
404 let result = format_document(input, &FormatOptions::default());
405 assert_eq!(result, "word1 word2 word3 word4");
406 }
407
408 #[test]
409 fn multi_space_internal_preserved() {
410 let input = "Vi \"the original\".\n";
411 let result = format_document(input, &FormatOptions::default());
412 assert_eq!(result, input);
413 }
414
415 #[test]
416 fn reflow_never_preserves_line_breaks() {
417 let input = "word1 word2\nword3 word4";
418 let opts = FormatOptions {
419 reflow: ReflowMode::Never,
420 ..Default::default()
421 };
422 let result = format_document(input, &opts);
423 assert_eq!(result, input);
424 }
425
426 #[test]
427 fn reflow_only_if_too_long_skips_short_paragraph() {
428 let input = "Short line.\nAnother short line.\n";
429 let opts = FormatOptions {
430 reflow: ReflowMode::OnlyIfTooLong,
431 ..Default::default()
432 };
433 let result = format_document(input, &opts);
434 assert_eq!(result, input);
435 }
436
437 #[test]
438 fn reflow_only_if_too_long_reflows_overlong_paragraph() {
439 let input = format!("{}\n", "word ".repeat(20).trim_end());
440 let opts = FormatOptions {
441 reflow: ReflowMode::OnlyIfTooLong,
442 ..Default::default()
443 };
444 let result = format_document(&input, &opts);
445 assert_ne!(result, input);
446 assert!(result.lines().all(|l| l.len() <= 78));
447 }
448
449 #[test]
450 fn pipe_table_padded_preserved() {
451 let input = "\
452| Command | List |
453| -------- | -------------- |
454| `files` | find or fd |
455| `buffers` | open buffers |
456";
457 let result = format_document(input, &FormatOptions::default());
458 assert_eq!(result, input);
459 }
460
461 #[test]
462 fn pipe_table_tight_preserved() {
463 let input = "\
464|Prefix |Behavior |
465|-----------|-----------------------------------|
466|`no prefix`|Files |
467|`$` |Buffers |
468";
469 let result = format_document(input, &FormatOptions::default());
470 assert_eq!(result, input);
471 }
472
473 #[test]
474 fn pipe_table_not_merged_with_adjacent_prose() {
475 let input = "\
476Prose before the table.
477
478| Command | List |
479| -------- | ---------- |
480| `files` | find or fd |
481
482Prose after the table.
483";
484 let result = format_document(input, &FormatOptions::default());
485 assert_eq!(result, input);
486 }
487
488 #[test]
489 fn pipe_table_idempotent() {
490 let input = "\
491| Key | Command | Key | Command |
492| ----------| ------------------| ----------| ------------------|
493| `<C-\\>` | buffers | `<C-p>` | files |
494";
495 let once = format_document(input, &FormatOptions::default());
496 let twice = format_document(&once, &FormatOptions::default());
497 assert_eq!(once, twice);
498 }
499
500 #[test]
501 fn pipe_table_prose_after_not_blocked() {
502 let input = "\
503| Col | Val |
504
505word1 word2
506word3 word4
507";
508 let result = format_document(input, &FormatOptions::default());
509 assert!(result.contains("| Col | Val |"));
510 assert!(result.contains("word1 word2 word3 word4"));
511 }
512
513 #[test]
514 fn normalize_spacing_collapses_double_space() {
515 let input = "First sentence. Second sentence.\n";
516 let opts = FormatOptions {
517 normalize_spacing: true,
518 ..Default::default()
519 };
520 let result = format_document(input, &opts);
521 assert_eq!(result, "First sentence. Second sentence.\n");
522 }
523}