1use serde::Deserialize;
2
3use crate::parser::{Document, LineKind, SepKind};
4
5#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Deserialize)]
6#[serde(rename_all = "kebab-case")]
7pub enum ReflowMode {
8 #[default]
9 Always,
10 OnlyIfTooLong,
11 Never,
12}
13
14pub struct FormatOptions {
15 pub line_width: usize,
16 pub reflow: ReflowMode,
17 pub normalize_spacing: bool,
18}
19
20impl Default for FormatOptions {
21 fn default() -> Self {
22 Self {
23 line_width: 78,
24 reflow: ReflowMode::default(),
25 normalize_spacing: false,
26 }
27 }
28}
29
30pub(crate) fn display_width(s: &str) -> usize {
31 s.chars().count()
32}
33
34#[must_use]
35pub fn format_document(text: &str, opts: &FormatOptions) -> String {
36 let doc = Document::parse(text);
37 let raw_lines: Vec<&str> = text.lines().collect();
38 let n = doc.lines.len();
39 let mut out = Vec::with_capacity(n);
40 let mut i = 0;
41
42 while i < n {
43 let pl = &doc.lines[i];
44 match &pl.kind {
45 LineKind::Blank => {
46 out.push(String::new());
47 i += 1;
48 }
49 LineKind::Separator(kind) => {
50 let ch = match kind {
51 SepKind::Major => '=',
52 SepKind::Minor => '-',
53 };
54 out.push(ch.to_string().repeat(opts.line_width));
55 i += 1;
56 }
57 LineKind::CodeBody => {
58 out.push(raw_lines[i].to_string());
59 i += 1;
60 }
61 LineKind::ListItem => {
62 out.push(raw_lines[i].trim_end().to_string());
63 i += 1;
64 }
65 LineKind::Text => {
66 if pl.tag_defs.is_empty() {
67 let indent = leading_whitespace(raw_lines[i]);
68 if indent.is_empty() {
69 if raw_lines[i].contains('\t') {
70 out.push(raw_lines[i].trim_end().to_string());
71 i += 1;
72 } else {
73 match opts.reflow {
74 ReflowMode::Never => {
75 out.push(raw_lines[i].trim_end().to_string());
76 i += 1;
77 }
78 ReflowMode::Always | ReflowMode::OnlyIfTooLong => {
79 i = emit_prose_paragraph(
80 &raw_lines, &doc, opts, i, n, &mut out,
81 );
82 }
83 }
84 }
85 } else {
86 out.push(raw_lines[i].trim_end().to_string());
87 i += 1;
88 }
89 } else {
90 out.push(format_heading(raw_lines[i], pl, opts.line_width));
91 i += 1;
92 }
93 }
94 }
95 }
96
97 let mut result = out.join("\n");
98 if text.ends_with('\n') {
99 result.push('\n');
100 }
101 result
102}
103
104fn emit_prose_paragraph(
105 raw_lines: &[&str],
106 doc: &Document,
107 opts: &FormatOptions,
108 start: usize,
109 n: usize,
110 out: &mut Vec<String>,
111) -> usize {
112 let mut j = start;
113 while j < n
114 && doc.lines[j].kind == LineKind::Text
115 && doc.lines[j].tag_defs.is_empty()
116 && leading_whitespace(raw_lines[j]).is_empty()
117 && !raw_lines[j].contains('\t')
118 {
119 j += 1;
120 }
121 if opts.reflow == ReflowMode::OnlyIfTooLong
122 && raw_lines[start..j]
123 .iter()
124 .all(|l| display_width(l) <= opts.line_width)
125 {
126 for line in &raw_lines[start..j] {
127 out.push(line.trim_end().to_string());
128 }
129 return j;
130 }
131 let num_lines = j - start;
132 let mut tokens: Vec<(&str, usize)> = Vec::new();
133 let mut pending_space: usize = 0;
134 for (idx, line) in raw_lines[start..j].iter().enumerate() {
135 let is_last_line = idx == num_lines - 1;
136 let line_tokens = split_words_with_spacing(line);
137 let len = line_tokens.len();
138 for (k, (word, trailing)) in line_tokens.into_iter().enumerate() {
139 tokens.push((word, pending_space));
140 pending_space = if opts.normalize_spacing || (!is_last_line && k == len - 1) {
141 1
142 } else {
143 trailing
144 };
145 }
146 }
147 reflow_tokens(&tokens, opts.line_width, out);
148 j
149}
150
151pub(crate) fn utf16_col_to_byte(s: &str, utf16: usize) -> usize {
152 let mut col = 0usize;
153 for (byte_pos, ch) in s.char_indices() {
154 if col >= utf16 {
155 return byte_pos;
156 }
157 col += ch.len_utf16();
158 }
159 s.len()
160}
161
162#[allow(clippy::cast_possible_truncation)]
163fn format_heading(raw: &str, pl: &crate::parser::ParsedLine, line_width: usize) -> String {
164 let tag_start_utf16 = pl.tag_defs[0].range.start.character as usize;
165 let tag_start = utf16_col_to_byte(raw, tag_start_utf16);
166
167 if tag_start == 0 {
168 return raw.trim_end().to_string();
169 }
170
171 let left = raw[..tag_start].trim_end();
172 let right: String = pl
173 .tag_defs
174 .iter()
175 .map(|s| format!("*{}*", s.name))
176 .collect::<Vec<_>>()
177 .join(" ");
178
179 if display_width(left) + 1 + display_width(&right) >= line_width {
180 return format!("{left} {right}");
181 }
182
183 let spaces = line_width - display_width(left) - display_width(&right);
184 format!("{left}{}{right}", " ".repeat(spaces))
185}
186
187fn leading_whitespace(s: &str) -> &str {
188 let trimmed = s.trim_start_matches([' ', '\t']);
189 &s[..s.len() - trimmed.len()]
190}
191
192fn split_words_with_spacing(s: &str) -> Vec<(&str, usize)> {
193 let bytes = s.as_bytes();
194 let mut result = Vec::new();
195 let mut i = 0;
196 while i < bytes.len() {
197 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
198 i += 1;
199 }
200 if i >= bytes.len() {
201 break;
202 }
203 let start = i;
204 while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
205 i += 1;
206 }
207 let word = &s[start..i];
208 let sp_start = i;
209 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
210 i += 1;
211 }
212 result.push((word, i - sp_start));
213 }
214 result
215}
216
217fn reflow_tokens(tokens: &[(&str, usize)], line_width: usize, out: &mut Vec<String>) {
218 if tokens.is_empty() {
219 return;
220 }
221 let mut line = String::new();
222 for (word, pre_space) in tokens {
223 let pre_space = *pre_space;
224 if line.is_empty() {
225 line.push_str(word);
226 } else if display_width(&line) + 1 + display_width(word) <= line_width {
227 let sp = pre_space.min(line_width - display_width(&line) - display_width(word));
228 for _ in 0..sp {
229 line.push(' ');
230 }
231 line.push_str(word);
232 } else {
233 out.push(line);
234 line = word.to_string();
235 }
236 }
237 if !line.is_empty() {
238 out.push(line);
239 }
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245
246 #[test]
247 fn normalizes_major_separator() {
248 let result = format_document(&"=".repeat(40), &FormatOptions::default());
249 assert_eq!(result.trim_end(), &"=".repeat(78));
250 }
251
252 #[test]
253 fn normalizes_minor_separator() {
254 let result = format_document(&"-".repeat(40), &FormatOptions::default());
255 assert_eq!(result.trim_end(), &"-".repeat(78));
256 }
257
258 #[test]
259 fn reflows_prose() {
260 let input = "word1 word2\nword3 word4";
261 let result = format_document(input, &FormatOptions::default());
262 assert_eq!(result, "word1 word2 word3 word4");
263 }
264
265 #[test]
266 fn preserves_code_block() {
267 let input = "example >\n indented code\n<\nafter";
268 let result = format_document(input, &FormatOptions::default());
269 assert!(result.contains(" indented code"));
270 }
271
272 #[test]
273 fn idempotent_separator() {
274 let input = format!("{}\n", "=".repeat(78));
275 let once = format_document(&input, &FormatOptions::default());
276 let twice = format_document(&once, &FormatOptions::default());
277 assert_eq!(once, twice);
278 }
279
280 #[test]
281 fn aligns_heading_tag_right() {
282 let opts = FormatOptions {
283 line_width: 30,
284 ..Default::default()
285 };
286 let result = format_document("Introduction *intro*\n", &opts);
287 assert_eq!(result, "Introduction *intro*\n");
288 }
289
290 #[test]
291 fn heading_tag_at_column_zero_preserved() {
292 let opts = FormatOptions {
293 line_width: 30,
294 ..Default::default()
295 };
296 let result = format_document("*intro* Introduction\n", &opts);
297 assert_eq!(result, "*intro* Introduction\n");
298 }
299
300 #[test]
301 fn preserves_code_fence_with_language() {
302 let input = "prose\n>lua\n code()\n<\nafter\n";
303 let result = format_document(input, &FormatOptions::default());
304 assert_eq!(result, input);
305 }
306
307 #[test]
308 fn prose_not_merged_into_code_fence() {
309 let input = "This is prose.\n>lua\n code()\n<\n";
310 let result = format_document(input, &FormatOptions::default());
311 assert_eq!(result, input);
312 }
313
314 #[test]
315 fn heading_tag_fallback_when_line_too_long() {
316 let opts = FormatOptions {
317 line_width: 20,
318 ..Default::default()
319 };
320 let result = format_document("A very long heading *tag*\n", &opts);
321 assert_eq!(result, "A very long heading *tag*\n");
322 }
323
324 #[test]
325 fn list_items_not_merged() {
326 let input = "- item 1\n- item 2\n- item 3\n";
327 let result = format_document(input, &FormatOptions::default());
328 assert_eq!(result, input);
329 }
330
331 #[test]
332 fn list_item_not_merged_with_preceding_prose() {
333 let input = "Prose intro.\n- Item.\n";
334 let result = format_document(input, &FormatOptions::default());
335 assert_eq!(result, input);
336 }
337
338 #[test]
339 fn asterisk_list_item_preserved() {
340 let input = "* item text\n";
341 let result = format_document(input, &FormatOptions::default());
342 assert_eq!(result, input);
343 }
344
345 #[test]
346 fn tab_command_ref_preserved() {
347 let input = "CTRL-V\t\tInsert next non-digit literally.\n";
348 let result = format_document(input, &FormatOptions::default());
349 assert_eq!(result, input);
350 }
351
352 #[test]
353 fn tab_line_not_merged_with_adjacent_prose() {
354 let input = "Prose before.\nCTRL-V\t\tDescription.\nProse after.\n";
355 let result = format_document(input, &FormatOptions::default());
356 assert_eq!(result, input);
357 }
358
359 #[test]
360 fn tab_idempotent() {
361 let input = "CTRL-V\t\tInsert next non-digit literally.\n\t\tcontinuation line.\n";
362 let once = format_document(input, &FormatOptions::default());
363 let twice = format_document(&once, &FormatOptions::default());
364 assert_eq!(once, twice);
365 }
366
367 #[test]
368 fn ordered_list_items_not_merged() {
369 let input = "1. First item\n2. Second item\n3. Third item\n";
370 let result = format_document(input, &FormatOptions::default());
371 assert_eq!(result, input);
372 }
373
374 #[test]
375 fn ordered_list_not_merged_with_prose() {
376 let input = "Intro text.\n1. First item\n2. Second item\n";
377 let result = format_document(input, &FormatOptions::default());
378 assert_eq!(result, input);
379 }
380
381 #[test]
382 fn double_space_after_period_preserved() {
383 let input = "First sentence. Second sentence.\n";
384 let result = format_document(input, &FormatOptions::default());
385 assert_eq!(result, input);
386 }
387
388 #[test]
389 fn double_space_preserved_during_reflow() {
390 let input = "The quick brown fox. The lazy dog sat.\n";
391 let result = format_document(input, &FormatOptions::default());
392 assert_eq!(result, input);
393 }
394
395 #[test]
396 fn line_break_joins_with_single_space() {
397 let input = "word1 word2\nword3 word4";
398 let result = format_document(input, &FormatOptions::default());
399 assert_eq!(result, "word1 word2 word3 word4");
400 }
401
402 #[test]
403 fn multi_space_internal_preserved() {
404 let input = "Vi \"the original\".\n";
405 let result = format_document(input, &FormatOptions::default());
406 assert_eq!(result, input);
407 }
408
409 #[test]
410 fn reflow_never_preserves_line_breaks() {
411 let input = "word1 word2\nword3 word4";
412 let opts = FormatOptions {
413 reflow: ReflowMode::Never,
414 ..Default::default()
415 };
416 let result = format_document(input, &opts);
417 assert_eq!(result, input);
418 }
419
420 #[test]
421 fn reflow_only_if_too_long_skips_short_paragraph() {
422 let input = "Short line.\nAnother short line.\n";
423 let opts = FormatOptions {
424 reflow: ReflowMode::OnlyIfTooLong,
425 ..Default::default()
426 };
427 let result = format_document(input, &opts);
428 assert_eq!(result, input);
429 }
430
431 #[test]
432 fn reflow_only_if_too_long_reflows_overlong_paragraph() {
433 let input = format!("{}\n", "word ".repeat(20).trim_end());
434 let opts = FormatOptions {
435 reflow: ReflowMode::OnlyIfTooLong,
436 ..Default::default()
437 };
438 let result = format_document(&input, &opts);
439 assert_ne!(result, input);
440 assert!(result.lines().all(|l| l.len() <= 78));
441 }
442
443 #[test]
444 fn normalize_spacing_collapses_double_space() {
445 let input = "First sentence. Second sentence.\n";
446 let opts = FormatOptions {
447 normalize_spacing: true,
448 ..Default::default()
449 };
450 let result = format_document(input, &opts);
451 assert_eq!(result, "First sentence. Second sentence.\n");
452 }
453}