uu_fmt/
fmt.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5
6// spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix
7
8use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
9use std::fs::File;
10use std::io::{stdin, stdout, BufReader, BufWriter, Read, Stdout, Write};
11use uucore::display::Quotable;
12use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
13use uucore::{format_usage, help_about, help_usage};
14
15use linebreak::break_lines;
16use parasplit::ParagraphStream;
17
18mod linebreak;
19mod parasplit;
20
21const ABOUT: &str = help_about!("fmt.md");
22const USAGE: &str = help_usage!("fmt.md");
23const MAX_WIDTH: usize = 2500;
24const DEFAULT_GOAL: usize = 70;
25const DEFAULT_WIDTH: usize = 75;
26// by default, goal is 93% of width
27const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93;
28
29mod options {
30    pub const CROWN_MARGIN: &str = "crown-margin";
31    pub const TAGGED_PARAGRAPH: &str = "tagged-paragraph";
32    pub const PRESERVE_HEADERS: &str = "preserve-headers";
33    pub const SPLIT_ONLY: &str = "split-only";
34    pub const UNIFORM_SPACING: &str = "uniform-spacing";
35    pub const PREFIX: &str = "prefix";
36    pub const SKIP_PREFIX: &str = "skip-prefix";
37    pub const EXACT_PREFIX: &str = "exact-prefix";
38    pub const EXACT_SKIP_PREFIX: &str = "exact-skip-prefix";
39    pub const WIDTH: &str = "width";
40    pub const GOAL: &str = "goal";
41    pub const QUICK: &str = "quick";
42    pub const TAB_WIDTH: &str = "tab-width";
43    pub const FILES_OR_WIDTH: &str = "files";
44}
45
46pub type FileOrStdReader = BufReader<Box<dyn Read + 'static>>;
47
48pub struct FmtOptions {
49    crown: bool,
50    tagged: bool,
51    mail: bool,
52    split_only: bool,
53    prefix: Option<String>,
54    xprefix: bool,
55    anti_prefix: Option<String>,
56    xanti_prefix: bool,
57    uniform: bool,
58    quick: bool,
59    width: usize,
60    goal: usize,
61    tabwidth: usize,
62}
63
64impl FmtOptions {
65    fn from_matches(matches: &ArgMatches) -> UResult<Self> {
66        let mut tagged = matches.get_flag(options::TAGGED_PARAGRAPH);
67        let mut crown = matches.get_flag(options::CROWN_MARGIN);
68
69        let mail = matches.get_flag(options::PRESERVE_HEADERS);
70        let uniform = matches.get_flag(options::UNIFORM_SPACING);
71        let quick = matches.get_flag(options::QUICK);
72        let split_only = matches.get_flag(options::SPLIT_ONLY);
73
74        if crown {
75            tagged = false;
76        }
77        if split_only {
78            crown = false;
79            tagged = false;
80        }
81
82        let xprefix = matches.contains_id(options::EXACT_PREFIX);
83        let xanti_prefix = matches.contains_id(options::SKIP_PREFIX);
84
85        let prefix = matches.get_one::<String>(options::PREFIX).map(String::from);
86        let anti_prefix = matches
87            .get_one::<String>(options::SKIP_PREFIX)
88            .map(String::from);
89
90        let width_opt = extract_width(matches)?;
91        let goal_opt_str = matches.get_one::<String>(options::GOAL);
92        let goal_opt = if let Some(goal_str) = goal_opt_str {
93            match goal_str.parse::<usize>() {
94                Ok(goal) => Some(goal),
95                Err(_) => {
96                    return Err(USimpleError::new(
97                        1,
98                        format!("invalid goal: {}", goal_str.quote()),
99                    ));
100                }
101            }
102        } else {
103            None
104        };
105
106        let (width, goal) = match (width_opt, goal_opt) {
107            (Some(w), Some(g)) => {
108                if g > w {
109                    return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH."));
110                }
111                (w, g)
112            }
113            (Some(w), None) => {
114                // Only allow a goal of zero if the width is set to be zero
115                let g = (w * DEFAULT_GOAL_TO_WIDTH_RATIO / 100).max(if w == 0 { 0 } else { 1 });
116                (w, g)
117            }
118            (None, Some(g)) => {
119                if g > DEFAULT_WIDTH {
120                    return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH."));
121                }
122                let w = (g * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO).max(g + 3);
123                (w, g)
124            }
125            (None, None) => (DEFAULT_WIDTH, DEFAULT_GOAL),
126        };
127        debug_assert!(width >= goal, "GOAL {goal} should not be greater than WIDTH {width} when given {width_opt:?} and {goal_opt:?}.");
128
129        if width > MAX_WIDTH {
130            return Err(USimpleError::new(
131                1,
132                format!("invalid width: '{width}': Numerical result out of range"),
133            ));
134        }
135
136        let mut tabwidth = 8;
137        if let Some(s) = matches.get_one::<String>(options::TAB_WIDTH) {
138            tabwidth = match s.parse::<usize>() {
139                Ok(t) => t,
140                Err(e) => {
141                    return Err(USimpleError::new(
142                        1,
143                        format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e),
144                    ));
145                }
146            };
147        };
148
149        if tabwidth < 1 {
150            tabwidth = 1;
151        }
152
153        Ok(Self {
154            crown,
155            tagged,
156            mail,
157            split_only,
158            prefix,
159            xprefix,
160            anti_prefix,
161            xanti_prefix,
162            uniform,
163            quick,
164            width,
165            goal,
166            tabwidth,
167        })
168    }
169}
170
171/// Process the content of a file and format it according to the provided options.
172///
173/// # Arguments
174///
175/// * `file_name` - The name of the file to process. A value of "-" represents the standard input.
176/// * `fmt_opts` - A reference to a `FmtOptions` struct containing the formatting options.
177/// * `ostream` - A mutable reference to a `BufWriter` wrapping the standard output.
178///
179/// # Returns
180///
181/// A `UResult<()>` indicating success or failure.
182fn process_file(
183    file_name: &str,
184    fmt_opts: &FmtOptions,
185    ostream: &mut BufWriter<Stdout>,
186) -> UResult<()> {
187    let mut fp = BufReader::new(match file_name {
188        "-" => Box::new(stdin()) as Box<dyn Read + 'static>,
189        _ => {
190            let f = File::open(file_name)
191                .map_err_context(|| format!("cannot open {} for reading", file_name.quote()))?;
192            if f.metadata()
193                .map_err_context(|| format!("cannot get metadata for {}", file_name.quote()))?
194                .is_dir()
195            {
196                return Err(USimpleError::new(1, "read error".to_string()));
197            }
198
199            Box::new(f) as Box<dyn Read + 'static>
200        }
201    });
202
203    let p_stream = ParagraphStream::new(fmt_opts, &mut fp);
204    for para_result in p_stream {
205        match para_result {
206            Err(s) => {
207                ostream
208                    .write_all(s.as_bytes())
209                    .map_err_context(|| "failed to write output".to_string())?;
210                ostream
211                    .write_all(b"\n")
212                    .map_err_context(|| "failed to write output".to_string())?;
213            }
214            Ok(para) => break_lines(&para, fmt_opts, ostream)
215                .map_err_context(|| "failed to write output".to_string())?,
216        }
217    }
218
219    // flush the output after each file
220    ostream
221        .flush()
222        .map_err_context(|| "failed to write output".to_string())?;
223
224    Ok(())
225}
226
227/// Extract the file names from the positional arguments, ignoring any negative width in the first
228/// position.
229///
230/// # Returns
231/// A `UResult<()>` with the file names, or an error if one of the file names could not be parsed
232/// (e.g., it is given as a negative number not in the first argument and not after a --
233fn extract_files(matches: &ArgMatches) -> UResult<Vec<String>> {
234    let in_first_pos = matches
235        .index_of(options::FILES_OR_WIDTH)
236        .is_some_and(|x| x == 1);
237    let is_neg = |s: &str| s.parse::<isize>().is_ok_and(|w| w < 0);
238
239    let files: UResult<Vec<String>> = matches
240        .get_many::<String>(options::FILES_OR_WIDTH)
241        .into_iter()
242        .flatten()
243        .enumerate()
244        .filter_map(|(i, x)| {
245            if is_neg(x) {
246                if in_first_pos && i == 0 {
247                    None
248                } else {
249                    let first_num = x.chars().nth(1).expect("a negative number should be at least two characters long");
250                    Some(Err(
251                        UUsageError::new(1, format!("invalid option -- {first_num}; -WIDTH is recognized only when it is the first\noption; use -w N instead"))
252                    ))
253                }
254            } else {
255                Some(Ok(x.clone()))
256            }
257        })
258        .collect();
259
260    if files.as_ref().is_ok_and(|f| f.is_empty()) {
261        Ok(vec!["-".into()])
262    } else {
263        files
264    }
265}
266
267fn extract_width(matches: &ArgMatches) -> UResult<Option<usize>> {
268    let width_opt = matches.get_one::<String>(options::WIDTH);
269    if let Some(width_str) = width_opt {
270        if let Ok(width) = width_str.parse::<usize>() {
271            return Ok(Some(width));
272        } else {
273            return Err(USimpleError::new(
274                1,
275                format!("invalid width: {}", width_str.quote()),
276            ));
277        }
278    }
279
280    if let Some(1) = matches.index_of(options::FILES_OR_WIDTH) {
281        let width_arg = matches.get_one::<String>(options::FILES_OR_WIDTH).unwrap();
282        if let Some(num) = width_arg.strip_prefix('-') {
283            Ok(num.parse::<usize>().ok())
284        } else {
285            // will be treated as a file name
286            Ok(None)
287        }
288    } else {
289        Ok(None)
290    }
291}
292
293#[uucore::main]
294pub fn uumain(args: impl uucore::Args) -> UResult<()> {
295    let args: Vec<_> = args.collect();
296
297    // Warn the user if it looks like we're trying to pass a number in the first
298    // argument with non-numeric characters
299    if let Some(first_arg) = args.get(1) {
300        let first_arg = first_arg.to_string_lossy();
301        let malformed_number = first_arg.starts_with('-')
302            && first_arg.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
303            && first_arg.chars().skip(2).any(|c| !c.is_ascii_digit());
304        if malformed_number {
305            return Err(USimpleError::new(
306                1,
307                format!(
308                    "invalid width: {}",
309                    first_arg.strip_prefix('-').unwrap().quote()
310                ),
311            ));
312        }
313    }
314
315    let matches = uu_app().try_get_matches_from(&args)?;
316
317    let files = extract_files(&matches)?;
318
319    let fmt_opts = FmtOptions::from_matches(&matches)?;
320
321    let mut ostream = BufWriter::new(stdout());
322
323    for file_name in &files {
324        process_file(file_name, &fmt_opts, &mut ostream)?;
325    }
326
327    Ok(())
328}
329
330pub fn uu_app() -> Command {
331    Command::new(uucore::util_name())
332        .version(crate_version!())
333        .about(ABOUT)
334        .override_usage(format_usage(USAGE))
335        .infer_long_args(true)
336        .args_override_self(true)
337        .arg(
338            Arg::new(options::CROWN_MARGIN)
339                .short('c')
340                .long(options::CROWN_MARGIN)
341                .help(
342                    "First and second line of paragraph \
343                    may have different indentations, in which \
344                    case the first line's indentation is preserved, \
345                    and each subsequent line's indentation matches the second line.",
346                )
347                .action(ArgAction::SetTrue),
348        )
349        .arg(
350            Arg::new(options::TAGGED_PARAGRAPH)
351                .short('t')
352                .long("tagged-paragraph")
353                .help(
354                    "Like -c, except that the first and second line of a paragraph *must* \
355                    have different indentation or they are treated as separate paragraphs.",
356                )
357                .action(ArgAction::SetTrue),
358        )
359        .arg(
360            Arg::new(options::PRESERVE_HEADERS)
361                .short('m')
362                .long("preserve-headers")
363                .help(
364                    "Attempt to detect and preserve mail headers in the input. \
365                    Be careful when combining this flag with -p.",
366                )
367                .action(ArgAction::SetTrue),
368        )
369        .arg(
370            Arg::new(options::SPLIT_ONLY)
371                .short('s')
372                .long("split-only")
373                .help("Split lines only, do not reflow.")
374                .action(ArgAction::SetTrue),
375        )
376        .arg(
377            Arg::new(options::UNIFORM_SPACING)
378                .short('u')
379                .long("uniform-spacing")
380                .help(
381                    "Insert exactly one \
382                    space between words, and two between sentences. \
383                    Sentence breaks in the input are detected as [?!.] \
384                    followed by two spaces or a newline; other punctuation \
385                    is not interpreted as a sentence break.",
386                )
387                .action(ArgAction::SetTrue),
388        )
389        .arg(
390            Arg::new(options::PREFIX)
391                .short('p')
392                .long("prefix")
393                .help(
394                    "Reformat only lines \
395                    beginning with PREFIX, reattaching PREFIX to reformatted lines. \
396                    Unless -x is specified, leading whitespace will be ignored \
397                    when matching PREFIX.",
398                )
399                .value_name("PREFIX"),
400        )
401        .arg(
402            Arg::new(options::SKIP_PREFIX)
403                .short('P')
404                .long("skip-prefix")
405                .help(
406                    "Do not reformat lines \
407                    beginning with PSKIP. Unless -X is specified, leading whitespace \
408                    will be ignored when matching PSKIP",
409                )
410                .value_name("PSKIP"),
411        )
412        .arg(
413            Arg::new(options::EXACT_PREFIX)
414                .short('x')
415                .long("exact-prefix")
416                .help(
417                    "PREFIX must match at the \
418                    beginning of the line with no preceding whitespace.",
419                )
420                .action(ArgAction::SetTrue),
421        )
422        .arg(
423            Arg::new(options::EXACT_SKIP_PREFIX)
424                .short('X')
425                .long("exact-skip-prefix")
426                .help(
427                    "PSKIP must match at the \
428                    beginning of the line with no preceding whitespace.",
429                )
430                .action(ArgAction::SetTrue),
431        )
432        .arg(
433            Arg::new(options::WIDTH)
434                .short('w')
435                .long("width")
436                .help("Fill output lines up to a maximum of WIDTH columns, default 75. This can be specified as a negative number in the first argument.")
437                // We must accept invalid values if they are overridden later. This is not supported by clap, so accept all strings instead.
438                .value_name("WIDTH"),
439        )
440        .arg(
441            Arg::new(options::GOAL)
442                .short('g')
443                .long("goal")
444                .help("Goal width, default of 93% of WIDTH. Must be less than or equal to WIDTH.")
445                // We must accept invalid values if they are overridden later. This is not supported by clap, so accept all strings instead.
446                .value_name("GOAL"),
447        )
448        .arg(
449            Arg::new(options::QUICK)
450                .short('q')
451                .long("quick")
452                .help(
453                    "Break lines more quickly at the \
454            expense of a potentially more ragged appearance.",
455                )
456                .action(ArgAction::SetTrue),
457        )
458        .arg(
459            Arg::new(options::TAB_WIDTH)
460                .short('T')
461                .long("tab-width")
462                .help(
463                    "Treat tabs as TABWIDTH spaces for \
464                    determining line length, default 8. Note that this is used only for \
465                    calculating line lengths; tabs are preserved in the output.",
466                )
467                .value_name("TABWIDTH"),
468        )
469        .arg(
470            Arg::new(options::FILES_OR_WIDTH)
471                .action(ArgAction::Append)
472                .value_name("FILES")
473                .value_hint(clap::ValueHint::FilePath)
474                .allow_negative_numbers(true),
475        )
476}
477
478#[cfg(test)]
479mod tests {
480    use crate::uu_app;
481    use crate::{extract_files, extract_width};
482
483    #[test]
484    fn parse_negative_width() {
485        let matches = uu_app()
486            .try_get_matches_from(vec!["fmt", "-3", "some-file"])
487            .unwrap();
488
489        assert_eq!(extract_files(&matches).unwrap(), vec!["some-file"]);
490        assert_eq!(extract_width(&matches).ok(), Some(Some(3)));
491    }
492
493    #[test]
494    fn parse_width_as_arg() {
495        let matches = uu_app()
496            .try_get_matches_from(vec!["fmt", "-w3", "some-file"])
497            .unwrap();
498
499        assert_eq!(extract_files(&matches).unwrap(), vec!["some-file"]);
500        assert_eq!(extract_width(&matches).ok(), Some(Some(3)));
501    }
502
503    #[test]
504    fn parse_no_args() {
505        let matches = uu_app().try_get_matches_from(vec!["fmt"]).unwrap();
506
507        assert_eq!(extract_files(&matches).unwrap(), vec!["-"]);
508        assert_eq!(extract_width(&matches).ok(), Some(None));
509    }
510
511    #[test]
512    fn parse_just_file_name() {
513        let matches = uu_app()
514            .try_get_matches_from(vec!["fmt", "some-file"])
515            .unwrap();
516
517        assert_eq!(extract_files(&matches).unwrap(), vec!["some-file"]);
518        assert_eq!(extract_width(&matches).ok(), Some(None));
519    }
520
521    #[test]
522    fn parse_with_both_widths_positional_first() {
523        let matches = uu_app()
524            .try_get_matches_from(vec!["fmt", "-10", "-w3", "some-file"])
525            .unwrap();
526
527        assert_eq!(extract_files(&matches).unwrap(), vec!["some-file"]);
528        assert_eq!(extract_width(&matches).ok(), Some(Some(3)));
529    }
530}