1use std::io::{self, Read, Write};
2
3pub struct FmtConfig {
5 pub width: usize,
7 pub goal: usize,
9 pub split_only: bool,
11 pub crown_margin: bool,
13 pub tagged: bool,
15 pub uniform_spacing: bool,
17 pub prefix: Option<String>,
19}
20
21impl Default for FmtConfig {
22 fn default() -> Self {
23 let width = 75;
24 Self {
25 width,
26 goal: (width * 187) / 200,
27 split_only: false,
28 crown_margin: false,
29 tagged: false,
30 uniform_spacing: false,
31 prefix: None,
32 }
33 }
34}
35
36pub fn fmt_file<R: Read, W: Write>(
42 mut input: R,
43 output: &mut W,
44 config: &FmtConfig,
45) -> io::Result<()> {
46 let mut data = Vec::new();
48 input.read_to_end(&mut data)?;
49 fmt_data(&data, output, config)
50}
51
52pub fn fmt_data(data: &[u8], output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
54 let text = match std::str::from_utf8(data) {
56 Ok(s) => s,
57 Err(_) => {
58 let owned = String::from_utf8_lossy(data);
60 return fmt_str_owned(&owned, output, config);
61 }
62 };
63 fmt_str(text, output, config)
64}
65
66fn fmt_str(text: &str, output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
68 let prefix_str = config.prefix.as_deref();
69 let mut para_start = 0;
70 let bytes = text.as_bytes();
71
72 let mut i = 0;
74 let _para_lines_start = 0; while i < bytes.len() {
77 let line_end = memchr::memchr(b'\n', &bytes[i..])
79 .map(|p| i + p)
80 .unwrap_or(bytes.len());
81
82 let line = &text[i..line_end];
83
84 let line = line.strip_suffix('\r').unwrap_or(line);
86
87 if let Some(pfx) = prefix_str {
89 if !line.starts_with(pfx) {
90 if para_start < i {
92 format_paragraph_str(text, para_start, i, config, output)?;
93 }
94 para_start = if line_end < bytes.len() {
95 line_end + 1
96 } else {
97 bytes.len()
98 };
99 output.write_all(line.as_bytes())?;
101 output.write_all(b"\n")?;
102 i = para_start;
103 continue;
104 }
105 }
106
107 if line.trim().is_empty() {
108 if para_start < i {
110 format_paragraph_str(text, para_start, i, config, output)?;
111 }
112 output.write_all(b"\n")?;
113 para_start = if line_end < bytes.len() {
114 line_end + 1
115 } else {
116 bytes.len()
117 };
118 }
119
120 i = if line_end < bytes.len() {
121 line_end + 1
122 } else {
123 bytes.len()
124 };
125 }
126
127 if para_start < bytes.len() {
129 let remaining = text[para_start..].trim_end_matches('\n');
130 if !remaining.is_empty() {
131 format_paragraph_str(text, para_start, bytes.len(), config, output)?;
132 }
133 }
134
135 Ok(())
136}
137
138fn fmt_str_owned(text: &str, output: &mut impl Write, config: &FmtConfig) -> io::Result<()> {
140 fmt_str(text, output, config)
141}
142
143fn format_paragraph_str(
146 text: &str,
147 start: usize,
148 end: usize,
149 config: &FmtConfig,
150 output: &mut impl Write,
151) -> io::Result<()> {
152 let region = &text[start..end];
153 let lines: Vec<&str> = region
155 .split('\n')
156 .map(|l| l.strip_suffix('\r').unwrap_or(l))
157 .filter(|l| !l.is_empty())
158 .collect();
159
160 if lines.is_empty() {
161 return Ok(());
162 }
163
164 let prefix_str = config.prefix.as_deref();
165
166 let stripped_first = match prefix_str {
168 Some(pfx) => lines[0].strip_prefix(pfx).unwrap_or(lines[0]),
169 None => lines[0],
170 };
171
172 let stripped_second: &str = if lines.len() > 1 {
173 match prefix_str {
174 Some(pfx) => lines[1].strip_prefix(pfx).unwrap_or(lines[1]),
175 None => lines[1],
176 }
177 } else {
178 stripped_first
179 };
180
181 let first_indent = leading_indent(stripped_first);
182 let rest_indent = leading_indent(stripped_second);
183
184 let (first_line_indent, cont_indent) = if config.tagged || config.crown_margin {
185 (first_indent, rest_indent)
186 } else {
187 (first_indent, first_indent)
188 };
189
190 if config.split_only {
191 for line in &lines {
192 split_long_line(line, config, prefix_str, output)?;
193 }
194 return Ok(());
195 }
196
197 let total_chars: usize = lines.iter().map(|l| l.len()).sum();
199 let mut all_words: Vec<&str> = Vec::with_capacity(total_chars / 5 + 16);
200 for line in &lines {
201 let s = match prefix_str {
202 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
203 None => line,
204 };
205 all_words.extend(s.split_whitespace());
206 }
207
208 if all_words.is_empty() {
209 output.write_all(b"\n")?;
210 return Ok(());
211 }
212
213 let pfx = prefix_str.unwrap_or("");
214 reflow_paragraph(
215 &all_words,
216 pfx,
217 first_line_indent,
218 cont_indent,
219 config,
220 output,
221 )
222}
223
224fn leading_indent(line: &str) -> &str {
226 let trimmed = line.trim_start();
227 &line[..line.len() - trimmed.len()]
228}
229
230fn is_sentence_end(word: &str) -> bool {
232 matches!(word.as_bytes().last(), Some(b'.' | b'!' | b'?'))
233}
234
235fn reflow_paragraph<W: Write>(
242 words: &[&str],
243 prefix: &str,
244 first_indent: &str,
245 cont_indent: &str,
246 config: &FmtConfig,
247 output: &mut W,
248) -> io::Result<()> {
249 if words.is_empty() {
250 return Ok(());
251 }
252
253 let n = words.len();
254 let first_base = prefix.len() + first_indent.len();
255 let cont_base = prefix.len() + cont_indent.len();
256 let goal = config.goal as i64;
257 let width = config.width;
258 let uniform = config.uniform_spacing;
259
260 const LINE_COST: i64 = 70 * 70;
261 const NOBREAK_COST: i64 = 600 * 600;
262 const SENTENCE_BONUS: i64 = 50 * 50;
263 const SENT_FLAG: u32 = 1 << 16;
264
265 let winfo: Vec<u32> = words
269 .iter()
270 .map(|w| {
271 let len = w.len() as u32;
272 let sent = if matches!(w.as_bytes().last(), Some(b'.' | b'!' | b'?')) {
273 SENT_FLAG
274 } else {
275 0
276 };
277 len | sent
278 })
279 .collect();
280
281 let mut dp_cost = vec![i64::MAX; n + 1];
283 let mut best = vec![0u32; n];
284 let mut line_len = vec![0i32; n + 1];
285 dp_cost[n] = 0;
286
287 let winfo_ptr = winfo.as_ptr();
293 let dp_cost_ptr = dp_cost.as_mut_ptr();
294 let best_ptr = best.as_mut_ptr();
295 let line_len_ptr = line_len.as_mut_ptr();
296
297 for i in (0..n).rev() {
298 let base = if i == 0 { first_base } else { cont_base };
299 let mut len = base + unsafe { (*winfo_ptr.add(i) & 0xFFFF) as usize };
300 let mut best_total = i64::MAX;
301 let mut best_j = i as u32;
302 let mut best_len = len as i32;
303
304 for j in i..n {
305 if j > i {
306 let sep = if uniform && unsafe { *winfo_ptr.add(j - 1) & SENT_FLAG != 0 } {
307 2
308 } else {
309 1
310 };
311 len += sep + unsafe { (*winfo_ptr.add(j) & 0xFFFF) as usize };
312 }
313
314 if len > width {
315 if j == i {
316 let lc = if j == n - 1 {
317 0i64
318 } else {
319 let bc = if unsafe { *winfo_ptr.add(j) & SENT_FLAG != 0 } {
320 if uniform {
321 LINE_COST - SENTENCE_BONUS
322 } else {
323 LINE_COST + NOBREAK_COST
324 }
325 } else {
326 LINE_COST
327 };
328 let short_n = goal - len as i64;
329 let short_cost = short_n * 10 * short_n * 10;
330 let ragged_cost = if unsafe { *best_ptr.add(j + 1) as usize + 1 < n } {
331 let ragged_n = len as i64 - unsafe { *line_len_ptr.add(j + 1) } as i64;
332 ragged_n * 10 * ragged_n * 10 / 2
333 } else {
334 0
335 };
336 bc + short_cost + ragged_cost
337 };
338 let cj1 = unsafe { *dp_cost_ptr.add(j + 1) };
339 if cj1 != i64::MAX {
340 let total = lc + cj1;
341 if total < best_total {
342 best_total = total;
343 best_j = j as u32;
344 best_len = len as i32;
345 }
346 }
347 }
348 break;
349 }
350
351 let lc = if j == n - 1 {
352 0i64
353 } else {
354 let bc = if unsafe { *winfo_ptr.add(j) & SENT_FLAG != 0 } {
355 if uniform {
356 LINE_COST - SENTENCE_BONUS
357 } else {
358 LINE_COST + NOBREAK_COST
359 }
360 } else {
361 LINE_COST
362 };
363 let short_n = goal - len as i64;
364 let short_cost = short_n * 10 * short_n * 10;
365 let ragged_cost = if unsafe { *best_ptr.add(j + 1) as usize + 1 < n } {
366 let ragged_n = len as i64 - unsafe { *line_len_ptr.add(j + 1) } as i64;
367 ragged_n * 10 * ragged_n * 10 / 2
368 } else {
369 0
370 };
371 bc + short_cost + ragged_cost
372 };
373
374 let cj1 = unsafe { *dp_cost_ptr.add(j + 1) };
375 if cj1 != i64::MAX {
376 let total = lc + cj1;
377 if total < best_total {
378 best_total = total;
379 best_j = j as u32;
380 best_len = len as i32;
381 }
382 }
383 }
384
385 if best_total < i64::MAX {
386 unsafe {
387 *dp_cost_ptr.add(i) = best_total;
388 *best_ptr.add(i) = best_j;
389 *line_len_ptr.add(i) = best_len;
390 }
391 }
392 }
393
394 let mut i = 0;
396 let mut is_first_line = true;
397
398 while i < n {
399 let j = best[i] as usize;
400
401 output.write_all(prefix.as_bytes())?;
402 if is_first_line {
403 output.write_all(first_indent.as_bytes())?;
404 } else {
405 output.write_all(cont_indent.as_bytes())?;
406 }
407 output.write_all(words[i].as_bytes())?;
408
409 for k in (i + 1)..=j {
410 if uniform && is_sentence_end(words[k - 1]) {
411 output.write_all(b" ")?;
412 } else {
413 output.write_all(b" ")?;
414 }
415 output.write_all(words[k].as_bytes())?;
416 }
417 output.write_all(b"\n")?;
418
419 is_first_line = false;
420 i = j + 1;
421 }
422
423 Ok(())
424}
425
426fn split_long_line<W: Write>(
429 line: &str,
430 config: &FmtConfig,
431 prefix: Option<&str>,
432 output: &mut W,
433) -> io::Result<()> {
434 let stripped = match prefix {
435 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
436 None => line,
437 };
438 let indent = leading_indent(stripped);
439 let pfx = prefix.unwrap_or("");
440
441 if line.len() <= config.width {
442 output.write_all(line.as_bytes())?;
443 output.write_all(b"\n")?;
444 return Ok(());
445 }
446
447 let s = match prefix {
448 Some(pfx) => line.strip_prefix(pfx).unwrap_or(line),
449 None => line,
450 };
451
452 let pfx_indent_len = pfx.len() + indent.len();
453 let mut cur_len = pfx_indent_len;
454 let mut first_word_on_line = true;
455
456 output.write_all(pfx.as_bytes())?;
458 output.write_all(indent.as_bytes())?;
459
460 for (i, word) in s.split_whitespace().enumerate() {
461 let sep_len = if first_word_on_line {
462 0
463 } else if config.uniform_spacing && i > 0 {
464 1
467 } else {
468 1
469 };
470
471 if !first_word_on_line && cur_len + sep_len + word.len() > config.width {
472 output.write_all(b"\n")?;
473 output.write_all(pfx.as_bytes())?;
474 output.write_all(indent.as_bytes())?;
475 cur_len = pfx_indent_len;
476 first_word_on_line = true;
477 }
478
479 if !first_word_on_line {
480 output.write_all(b" ")?;
481 cur_len += 1;
482 }
483 output.write_all(word.as_bytes())?;
484 cur_len += word.len();
485 first_word_on_line = false;
486 }
487
488 if !first_word_on_line {
489 output.write_all(b"\n")?;
490 }
491
492 Ok(())
493}