1use std::io::{self, Read, Write};
2
3pub struct FmtConfig {
5 pub width: usize,
7 pub goal: usize,
9 pub split_only: bool,
11 pub crown_margin: bool,
13 pub tagged: bool,
15 pub uniform_spacing: bool,
17 pub prefix: Option<String>,
19}
20
21impl Default for FmtConfig {
22 fn default() -> Self {
23 let width = 75;
24 Self {
25 width,
26 goal: (width * 187) / 200,
27 split_only: false,
28 crown_margin: false,
29 tagged: false,
30 uniform_spacing: false,
31 prefix: None,
32 }
33 }
34}
35
36pub fn fmt_file<R: Read, W: Write>(
42 mut input: R,
43 output: &mut W,
44 config: &FmtConfig,
45) -> io::Result<()> {
46 let mut data = Vec::new();
48 input.read_to_end(&mut data)?;
49 fmt_data(&data, output, config)
50}
51
52pub fn fmt_data(data: &[u8], output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
54 let text = match std::str::from_utf8(data) {
56 Ok(s) => s,
57 Err(_) => {
58 let owned = String::from_utf8_lossy(data);
60 return fmt_str_owned(&owned, output, config);
61 }
62 };
63 fmt_str(text, output, config)
64}
65
66fn fmt_str(text: &str, output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
68 let prefix_str = config.prefix.as_deref();
69 let mut para_start = 0;
70 let bytes = text.as_bytes();
71
72 let mut i = 0;
74 let _para_lines_start = 0; while i < bytes.len() {
77 let line_end = memchr::memchr(b'\n', &bytes[i..])
79 .map(|p| i + p)
80 .unwrap_or(bytes.len());
81
82 let line = &text[i..line_end];
83
84 let line = line.strip_suffix('\r').unwrap_or(line);
86
87 if let Some(pfx) = prefix_str {
89 if !line.starts_with(pfx) {
90 if para_start < i {
92 format_paragraph_str(text, para_start, i, config, output)?;
93 }
94 para_start = if line_end < bytes.len() {
95 line_end + 1
96 } else {
97 bytes.len()
98 };
99 output.write_all(line.as_bytes())?;
101 output.write_all(b"\n")?;
102 i = para_start;
103 continue;
104 }
105 }
106
107 if line.trim().is_empty() {
108 if para_start < i {
110 format_paragraph_str(text, para_start, i, config, output)?;
111 }
112 output.write_all(b"\n")?;
113 para_start = if line_end < bytes.len() {
114 line_end + 1
115 } else {
116 bytes.len()
117 };
118 }
119
120 i = if line_end < bytes.len() {
121 line_end + 1
122 } else {
123 bytes.len()
124 };
125 }
126
127 if para_start < bytes.len() {
129 let remaining = text[para_start..].trim_end_matches('\n');
130 if !remaining.is_empty() {
131 format_paragraph_str(text, para_start, bytes.len(), config, output)?;
132 }
133 }
134
135 Ok(())
136}
137
138fn fmt_str_owned(text: &str, output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
140 fmt_str(text, output, config)
141}
142
143fn format_paragraph_str(
146 text: &str,
147 start: usize,
148 end: usize,
149 config: &FmtConfig,
150 output: &mut impl Write,
151) -> io::Result<()> {
152 let region = &text[start..end];
153 let lines: Vec<&str> = region
155 .split('\n')
156 .map(|l| l.strip_suffix('\r').unwrap_or(l))
157 .filter(|l| !l.is_empty())
158 .collect();
159
160 if lines.is_empty() {
161 return Ok(());
162 }
163
164 let prefix_str = config.prefix.as_deref();
165
166 let stripped_first = match prefix_str {
168 Some(pfx) => lines[0].strip_prefix(pfx).unwrap_or(lines[0]),
169 None => lines[0],
170 };
171
172 let stripped_second: &str = if lines.len() > 1 {
173 match prefix_str {
174 Some(pfx) => lines[1].strip_prefix(pfx).unwrap_or(lines[1]),
175 None => lines[1],
176 }
177 } else {
178 stripped_first
179 };
180
181 let first_indent = leading_indent(stripped_first);
182 let rest_indent = leading_indent(stripped_second);
183
184 let (first_line_indent, cont_indent) = if config.tagged || config.crown_margin {
185 (first_indent, rest_indent)
186 } else {
187 (first_indent, first_indent)
188 };
189
190 if config.split_only {
191 for line in &lines {
192 split_long_line(line, config, prefix_str, output)?;
193 }
194 return Ok(());
195 }
196
197 let total_chars: usize = lines.iter().map(|l| l.len()).sum();
201 let mut all_words: Vec<&str> = Vec::with_capacity(total_chars / 5 + 16);
202 let mut sentence_ends: Vec<bool> = Vec::with_capacity(total_chars / 5 + 16);
203
204 for line in &lines {
205 let s = match prefix_str {
206 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
207 None => line,
208 };
209 collect_words_with_sentence_info(s, &mut all_words, &mut sentence_ends);
210 }
211
212 if all_words.is_empty() {
213 output.write_all(b"\n")?;
214 return Ok(());
215 }
216
217 let pfx = prefix_str.unwrap_or("");
218 reflow_paragraph(
219 &all_words,
220 &sentence_ends,
221 pfx,
222 first_line_indent,
223 cont_indent,
224 config,
225 output,
226 )
227}
228
229fn leading_indent(line: &str) -> &str {
231 let trimmed = line.trim_start();
232 &line[..line.len() - trimmed.len()]
233}
234
235fn has_sentence_ending_punct(word: &str) -> bool {
238 let bytes = word.as_bytes();
239 let mut i = bytes.len();
241 while i > 0 && matches!(bytes[i - 1], b'"' | b'\'' | b')' | b']') {
242 i -= 1;
243 }
244 i > 0 && matches!(bytes[i - 1], b'.' | b'!' | b'?')
245}
246
247fn is_sentence_end_contextual(word: &str, followed_by_double_space_or_eol: bool) -> bool {
253 if !has_sentence_ending_punct(word) {
254 return false;
255 }
256 if !followed_by_double_space_or_eol {
257 return false;
258 }
259 let bytes = word.as_bytes();
261 let mut end = bytes.len();
262 while end > 0
263 && matches!(
264 bytes[end - 1],
265 b'.' | b'!' | b'?' | b'"' | b'\'' | b')' | b']'
266 )
267 {
268 end -= 1;
269 }
270 if end == 1 && bytes[0].is_ascii_uppercase() {
272 return false;
273 }
274 end > 0
276}
277
278const SENT_FLAG: u32 = 1 << 16; const PERIOD_FLAG: u32 = 1 << 17; const PUNCT_FLAG: u32 = 1 << 18; const PAREN_FLAG: u32 = 1 << 19; fn has_non_period_punct(word: &str) -> bool {
287 let bytes = word.as_bytes();
288 let mut i = bytes.len();
289 while i > 0 && matches!(bytes[i - 1], b'"' | b'\'' | b')' | b']') {
291 i -= 1;
292 }
293 i > 0 && matches!(bytes[i - 1], b',' | b';' | b':')
294}
295
296fn collect_words_with_sentence_info<'a>(
305 line: &'a str,
306 words: &mut Vec<&'a str>,
307 sentence_ends: &mut Vec<bool>,
308) {
309 let bytes = line.as_bytes();
310 let len = bytes.len();
311 let mut i = 0;
312
313 while i < len && bytes[i].is_ascii_whitespace() {
315 i += 1;
316 }
317
318 while i < len {
319 let word_start = i;
321 while i < len && !bytes[i].is_ascii_whitespace() {
322 i += 1;
323 }
324 let word = &line[word_start..i];
325
326 let space_start = i;
328 while i < len && bytes[i].is_ascii_whitespace() {
329 i += 1;
330 }
331 let space_count = i - space_start;
332
333 let at_eol = i >= len;
335 let double_space = space_count >= 2;
336
337 let is_sent_end = is_sentence_end_contextual(word, at_eol || double_space);
338
339 words.push(word);
340 sentence_ends.push(is_sent_end);
341 }
342}
343
344fn reflow_paragraph<W: Write>(
351 words: &[&str],
352 sentence_ends: &[bool],
353 prefix: &str,
354 first_indent: &str,
355 cont_indent: &str,
356 config: &FmtConfig,
357 output: &mut W,
358) -> io::Result<()> {
359 if words.is_empty() {
360 return Ok(());
361 }
362
363 let n = words.len();
364 let first_base = prefix.len() + first_indent.len();
365 let cont_base = prefix.len() + cont_indent.len();
366 let goal = config.goal as i64;
367 let width = config.width;
368 debug_assert_eq!(sentence_ends.len(), words.len());
369
370 const SHORT_FACTOR: i64 = 100;
382 const RAGGED_FACTOR: i64 = 50;
383 const LINE_COST: i64 = 70 * 70;
384 const SENTENCE_BONUS: i64 = 50 * 50;
385 const NOBREAK_COST: i64 = 600 * 600;
386 const PUNCT_BONUS: i64 = 40 * 40;
387 const PAREN_BONUS: i64 = 40 * 40;
388
389 let winfo: Vec<u32> = words
392 .iter()
393 .enumerate()
394 .map(|(i, w)| {
395 debug_assert!(w.len() <= 0xFFFF, "word too long for winfo packing");
396 let len = w.len() as u32;
397 let mut flags = 0u32;
398 if sentence_ends.get(i).copied().unwrap_or(false) {
399 flags |= SENT_FLAG; }
401 if has_sentence_ending_punct(w) {
402 flags |= PERIOD_FLAG; }
404 if has_non_period_punct(w) {
405 flags |= PUNCT_FLAG; }
407 let bytes = w.as_bytes();
408 if !bytes.is_empty() && matches!(bytes[0], b'(' | b'[' | b'{') {
409 flags |= PAREN_FLAG; }
411 len | flags
412 })
413 .collect();
414
415 let mut dp_cost = vec![i64::MAX; n + 1];
417 let mut best = vec![0u32; n];
418 let mut line_len = vec![0i32; n + 1];
419 dp_cost[n] = 0;
420
421 let winfo_ptr = winfo.as_ptr();
427 let dp_cost_ptr = dp_cost.as_mut_ptr();
428 let best_ptr = best.as_mut_ptr();
429 let line_len_ptr = line_len.as_mut_ptr();
430
431 for i in (0..n).rev() {
432 let base = if i == 0 { first_base } else { cont_base };
433 let mut len = base + unsafe { (*winfo_ptr.add(i) & 0xFFFF) as usize };
434 let mut best_total = i64::MAX;
435 let mut best_j = i as u32;
436 let mut best_len = len as i32;
437
438 for j in i..n {
439 if j > i {
440 let sep = if unsafe { *winfo_ptr.add(j - 1) & SENT_FLAG != 0 } {
442 2
443 } else {
444 1
445 };
446 len += sep + unsafe { (*winfo_ptr.add(j) & 0xFFFF) as usize };
447 }
448
449 macro_rules! try_candidate {
451 () => {
452 let lc = if j == n - 1 {
453 0i64
454 } else {
455 let short_n = goal - len as i64;
457 let short_cost = short_n * short_n * SHORT_FACTOR;
458 let ragged_cost = if unsafe { *best_ptr.add(j + 1) as usize + 1 < n } {
459 let ragged_n = len as i64 - unsafe { *line_len_ptr.add(j + 1) } as i64;
460 ragged_n * ragged_n * RAGGED_FACTOR
461 } else {
462 0
463 };
464 short_cost + ragged_cost
465 };
466
467 let bc = if j == n - 1 {
470 0i64
471 } else {
472 let wj = unsafe { *winfo_ptr.add(j) };
473 let wj1 = unsafe { *winfo_ptr.add(j + 1) };
474 let mut cost = LINE_COST;
475
476 if wj & PERIOD_FLAG != 0 {
478 if wj & SENT_FLAG != 0 {
479 cost -= SENTENCE_BONUS;
480 } else {
481 cost += NOBREAK_COST;
482 }
483 } else if wj & PUNCT_FLAG != 0 {
484 cost -= PUNCT_BONUS;
485 } else if j > 0 {
486 let wjm1 = unsafe { *winfo_ptr.add(j - 1) };
488 if wjm1 & SENT_FLAG != 0 {
489 let word_len = (wj & 0xFFFF) as i64;
490 cost += 40000 / (word_len + 2);
491 }
492 }
493
494 if wj1 & PAREN_FLAG != 0 {
496 cost -= PAREN_BONUS;
497 } else if wj1 & SENT_FLAG != 0 {
498 let word_len = (wj1 & 0xFFFF) as i64;
499 cost += 22500 / (word_len + 2);
500 }
501
502 cost
503 };
504
505 let cj1 = unsafe { *dp_cost_ptr.add(j + 1) };
506 if cj1 != i64::MAX {
507 let total = lc + bc + cj1;
508 if total < best_total {
509 best_total = total;
510 best_j = j as u32;
511 best_len = len as i32;
512 }
513 }
514 };
515 }
516
517 if len > width {
518 if j == i {
519 try_candidate!();
520 }
521 break;
522 }
523
524 try_candidate!();
525 }
526
527 if best_total < i64::MAX {
528 unsafe {
529 *dp_cost_ptr.add(i) = best_total;
530 *best_ptr.add(i) = best_j;
531 *line_len_ptr.add(i) = best_len;
532 }
533 }
534 }
535
536 let mut i = 0;
538 let mut is_first_line = true;
539
540 while i < n {
541 let j = best[i] as usize;
542
543 output.write_all(prefix.as_bytes())?;
544 if is_first_line {
545 output.write_all(first_indent.as_bytes())?;
546 } else {
547 output.write_all(cont_indent.as_bytes())?;
548 }
549 output.write_all(words[i].as_bytes())?;
550
551 for k in (i + 1)..=j {
552 if sentence_ends[k - 1] {
555 output.write_all(b" ")?;
556 } else {
557 output.write_all(b" ")?;
558 }
559 output.write_all(words[k].as_bytes())?;
560 }
561 output.write_all(b"\n")?;
562
563 is_first_line = false;
564 i = j + 1;
565 }
566
567 Ok(())
568}
569
570fn split_long_line<W: Write>(
573 line: &str,
574 config: &FmtConfig,
575 prefix: Option<&str>,
576 output: &mut W,
577) -> io::Result<()> {
578 let stripped = match prefix {
579 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
580 None => line,
581 };
582 let indent = leading_indent(stripped);
583 let pfx = prefix.unwrap_or("");
584
585 if line.len() <= config.width {
586 output.write_all(line.as_bytes())?;
587 output.write_all(b"\n")?;
588 return Ok(());
589 }
590
591 let s = match prefix {
592 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
593 None => line,
594 };
595
596 let pfx_indent_len = pfx.len() + indent.len();
597 let mut cur_len = pfx_indent_len;
598 let mut first_word_on_line = true;
599
600 output.write_all(pfx.as_bytes())?;
602 output.write_all(indent.as_bytes())?;
603
604 for word in s.split_whitespace() {
607 if !first_word_on_line {
608 let new_len = cur_len + 1 + word.len();
609 if new_len > config.width {
610 output.write_all(b"\n")?;
611 output.write_all(pfx.as_bytes())?;
612 output.write_all(indent.as_bytes())?;
613 cur_len = pfx_indent_len;
614 first_word_on_line = true;
615 }
616 }
617
618 if !first_word_on_line {
619 output.write_all(b" ")?;
620 cur_len += 1;
621 }
622 output.write_all(word.as_bytes())?;
623 cur_len += word.len();
624 first_word_on_line = false;
625 }
626
627 if !first_word_on_line {
628 output.write_all(b"\n")?;
629 }
630
631 Ok(())
632}