uu_ptx/
ptx.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5
6// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
7
8use std::cmp;
9use std::cmp::PartialEq;
10use std::collections::{BTreeSet, HashMap, HashSet};
11use std::ffi::{OsStr, OsString};
12use std::fmt::Write as FmtWrite;
13use std::fs::File;
14use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
15use std::num::ParseIntError;
16use std::path::Path;
17
18use clap::{Arg, ArgAction, Command};
19use regex::Regex;
20use thiserror::Error;
21use uucore::display::Quotable;
22use uucore::error::{FromIo, UError, UResult, UUsageError};
23use uucore::format_usage;
24use uucore::translate;
25
26#[derive(Debug, PartialEq)]
27enum OutFormat {
28    Dumb,
29    Roff,
30    Tex,
31}
32
33#[derive(Debug)]
34struct Config {
35    format: OutFormat,
36    gnu_ext: bool,
37    auto_ref: bool,
38    input_ref: bool,
39    right_ref: bool,
40    ignore_case: bool,
41    macro_name: String,
42    trunc_str: String,
43    context_regex: String,
44    line_width: usize,
45    gap_size: usize,
46}
47
48impl Default for Config {
49    fn default() -> Self {
50        Self {
51            format: OutFormat::Dumb,
52            gnu_ext: true,
53            auto_ref: false,
54            input_ref: false,
55            right_ref: false,
56            ignore_case: false,
57            macro_name: "xx".to_owned(),
58            trunc_str: "/".to_owned(),
59            context_regex: "\\w+".to_owned(),
60            line_width: 72,
61            gap_size: 3,
62        }
63    }
64}
65
66fn read_word_filter_file(
67    matches: &clap::ArgMatches,
68    option: &str,
69) -> std::io::Result<HashSet<String>> {
70    let filename = matches
71        .get_one::<OsString>(option)
72        .expect("parsing options failed!");
73    let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
74        Box::new(stdin())
75    } else {
76        let file = File::open(Path::new(filename))?;
77        Box::new(file)
78    });
79    let mut words: HashSet<String> = HashSet::new();
80    for word in reader.lines() {
81        words.insert(word?);
82    }
83    Ok(words)
84}
85
86/// reads contents of file as unique set of characters to be used with the break-file option
87fn read_char_filter_file(
88    matches: &clap::ArgMatches,
89    option: &str,
90) -> std::io::Result<HashSet<char>> {
91    let filename = matches
92        .get_one::<OsString>(option)
93        .expect("parsing options failed!");
94    let mut reader: Box<dyn Read> = if filename == "-" {
95        Box::new(stdin())
96    } else {
97        let file = File::open(Path::new(filename))?;
98        Box::new(file)
99    };
100    let mut buffer = String::new();
101    reader.read_to_string(&mut buffer)?;
102    Ok(buffer.chars().collect())
103}
104
105#[derive(Debug)]
106struct WordFilter {
107    only_specified: bool,
108    ignore_specified: bool,
109    only_set: HashSet<String>,
110    ignore_set: HashSet<String>,
111    word_regex: String,
112}
113
114impl WordFilter {
115    #[allow(clippy::cognitive_complexity)]
116    fn new(matches: &clap::ArgMatches, config: &Config) -> UResult<Self> {
117        let (o, oset): (bool, HashSet<String>) = if matches.contains_id(options::ONLY_FILE) {
118            let words =
119                read_word_filter_file(matches, options::ONLY_FILE).map_err_context(String::new)?;
120            (true, words)
121        } else {
122            (false, HashSet::new())
123        };
124        let (i, iset): (bool, HashSet<String>) = if matches.contains_id(options::IGNORE_FILE) {
125            let words = read_word_filter_file(matches, options::IGNORE_FILE)
126                .map_err_context(String::new)?;
127            (true, words)
128        } else {
129            (false, HashSet::new())
130        };
131        let break_set: Option<HashSet<char>> = if matches.contains_id(options::BREAK_FILE)
132            && !matches.contains_id(options::WORD_REGEXP)
133        {
134            let chars =
135                read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
136            let mut hs: HashSet<char> = if config.gnu_ext {
137                HashSet::new() // really only chars found in file
138            } else {
139                // GNU off means at least these are considered
140                [' ', '\t', '\n'].iter().copied().collect()
141            };
142            hs.extend(chars);
143            Some(hs)
144        } else {
145            // if -W takes precedence or default
146            None
147        };
148        // Ignore empty string regex from cmd-line-args
149        let arg_reg: Option<String> = if matches.contains_id(options::WORD_REGEXP) {
150            match matches.get_one::<String>(options::WORD_REGEXP) {
151                Some(v) => {
152                    if v.is_empty() {
153                        None
154                    } else {
155                        Some(v.to_owned())
156                    }
157                }
158                None => None,
159            }
160        } else {
161            None
162        };
163        let reg = match arg_reg {
164            Some(arg_reg) => arg_reg,
165            None => {
166                if let Some(break_set) = break_set {
167                    format!(
168                        "[^{}]+",
169                        regex::escape(&break_set.into_iter().collect::<String>())
170                    )
171                } else if config.gnu_ext {
172                    "\\w+".to_owned()
173                } else {
174                    "[^ \t\n]+".to_owned()
175                }
176            }
177        };
178        Ok(Self {
179            only_specified: o,
180            ignore_specified: i,
181            only_set: oset,
182            ignore_set: iset,
183            word_regex: reg,
184        })
185    }
186}
187
188#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
189struct WordRef {
190    word: String,
191    global_line_nr: usize,
192    local_line_nr: usize,
193    position: usize,
194    position_end: usize,
195    filename: OsString,
196}
197
198#[derive(Debug, Error)]
199enum PtxError {
200    #[error("{}", translate!("ptx-error-dumb-format"))]
201    DumbFormat,
202
203    #[error("{}", translate!("ptx-error-not-implemented", "feature" => (*.0)))]
204    NotImplemented(&'static str),
205
206    #[error("{0}")]
207    ParseError(ParseIntError),
208}
209
210impl UError for PtxError {}
211
212fn get_config(matches: &clap::ArgMatches) -> UResult<Config> {
213    let mut config = Config::default();
214    let err_msg = "parsing options failed";
215    if matches.get_flag(options::TRADITIONAL) {
216        config.gnu_ext = false;
217        config.format = OutFormat::Roff;
218        "[^ \t\n]+".clone_into(&mut config.context_regex);
219    } else {
220        return Err(PtxError::NotImplemented("GNU extensions").into());
221    }
222    if matches.contains_id(options::SENTENCE_REGEXP) {
223        return Err(PtxError::NotImplemented("-S").into());
224    }
225    config.auto_ref = matches.get_flag(options::AUTO_REFERENCE);
226    config.input_ref = matches.get_flag(options::REFERENCES);
227    config.right_ref = matches.get_flag(options::RIGHT_SIDE_REFS);
228    config.ignore_case = matches.get_flag(options::IGNORE_CASE);
229    if matches.contains_id(options::MACRO_NAME) {
230        matches
231            .get_one::<String>(options::MACRO_NAME)
232            .expect(err_msg)
233            .clone_into(&mut config.macro_name);
234    }
235    if matches.contains_id(options::FLAG_TRUNCATION) {
236        matches
237            .get_one::<String>(options::FLAG_TRUNCATION)
238            .expect(err_msg)
239            .clone_into(&mut config.trunc_str);
240    }
241    if matches.contains_id(options::WIDTH) {
242        config.line_width = matches
243            .get_one::<String>(options::WIDTH)
244            .expect(err_msg)
245            .parse()
246            .map_err(PtxError::ParseError)?;
247    }
248    if matches.contains_id(options::GAP_SIZE) {
249        config.gap_size = matches
250            .get_one::<String>(options::GAP_SIZE)
251            .expect(err_msg)
252            .parse()
253            .map_err(PtxError::ParseError)?;
254    }
255    if let Some(format) = matches.get_one::<String>(options::FORMAT) {
256        config.format = match format.as_str() {
257            "roff" => OutFormat::Roff,
258            "tex" => OutFormat::Tex,
259            _ => unreachable!("should be caught by clap"),
260        };
261    }
262    if matches.get_flag(options::format::ROFF) {
263        config.format = OutFormat::Roff;
264    }
265    if matches.get_flag(options::format::TEX) {
266        config.format = OutFormat::Tex;
267    }
268    Ok(config)
269}
270
271struct FileContent {
272    lines: Vec<String>,
273    chars_lines: Vec<Vec<char>>,
274    offset: usize,
275}
276
277type FileMap = HashMap<OsString, FileContent>;
278
279fn read_input(input_files: &[OsString]) -> std::io::Result<FileMap> {
280    let mut file_map: FileMap = HashMap::new();
281    let mut offset: usize = 0;
282    for filename in input_files {
283        let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
284            Box::new(stdin())
285        } else {
286            let file = File::open(Path::new(filename))?;
287            Box::new(file)
288        });
289        let lines: Vec<String> = reader.lines().collect::<std::io::Result<Vec<String>>>()?;
290
291        // Indexing UTF-8 string requires walking from the beginning, which can hurts performance badly when the line is long.
292        // Since we will be jumping around the line a lot, we dump the content into a Vec<char>, which can be indexed in constant time.
293        let chars_lines: Vec<Vec<char>> = lines.iter().map(|x| x.chars().collect()).collect();
294        let size = lines.len();
295        file_map.insert(
296            filename.clone(),
297            FileContent {
298                lines,
299                chars_lines,
300                offset,
301            },
302        );
303        offset += size;
304    }
305    Ok(file_map)
306}
307
308/// Go through every lines in the input files and record each match occurrence as a `WordRef`.
309fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) -> BTreeSet<WordRef> {
310    let reg = Regex::new(&filter.word_regex).unwrap();
311    let ref_reg = Regex::new(&config.context_regex).unwrap();
312    let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
313    for (file, lines) in file_map {
314        let mut count: usize = 0;
315        let offs = lines.offset;
316        for line in &lines.lines {
317            // if -r, exclude reference from word set
318            let (ref_beg, ref_end) = match ref_reg.find(line) {
319                Some(x) => (x.start(), x.end()),
320                None => (0, 0),
321            };
322            // match words with given regex
323            for mat in reg.find_iter(line) {
324                let (beg, end) = (mat.start(), mat.end());
325                if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
326                    continue;
327                }
328                let mut word = line[beg..end].to_owned();
329                if filter.only_specified && !filter.only_set.contains(&word) {
330                    continue;
331                }
332                if filter.ignore_specified && filter.ignore_set.contains(&word) {
333                    continue;
334                }
335                if config.ignore_case {
336                    word = word.to_uppercase();
337                }
338                word_set.insert(WordRef {
339                    word,
340                    filename: file.clone(),
341                    global_line_nr: offs + count,
342                    local_line_nr: count,
343                    position: beg,
344                    position_end: end,
345                });
346            }
347            count += 1;
348        }
349    }
350    word_set
351}
352
353fn get_reference(config: &Config, word_ref: &WordRef, line: &str, context_reg: &Regex) -> String {
354    if config.auto_ref {
355        if word_ref.filename == "-" {
356            format!(":{}", word_ref.local_line_nr + 1)
357        } else {
358            format!(
359                "{}:{}",
360                word_ref.filename.maybe_quote(),
361                word_ref.local_line_nr + 1
362            )
363        }
364    } else if config.input_ref {
365        let (beg, end) = match context_reg.find(line) {
366            Some(x) => (x.start(), x.end()),
367            None => (0, 0),
368        };
369        line[beg..end].to_string()
370    } else {
371        String::new()
372    }
373}
374
375fn assert_str_integrity(s: &[char], beg: usize, end: usize) {
376    assert!(beg <= end);
377    assert!(end <= s.len());
378}
379
380fn trim_broken_word_left(s: &[char], beg: usize, end: usize) -> usize {
381    assert_str_integrity(s, beg, end);
382    if beg == end || beg == 0 || s[beg].is_whitespace() || s[beg - 1].is_whitespace() {
383        return beg;
384    }
385    let mut b = beg;
386    while b < end && !s[b].is_whitespace() {
387        b += 1;
388    }
389    b
390}
391
392fn trim_broken_word_right(s: &[char], beg: usize, end: usize) -> usize {
393    assert_str_integrity(s, beg, end);
394    if beg == end || end == s.len() || s[end - 1].is_whitespace() || s[end].is_whitespace() {
395        return end;
396    }
397    let mut e = end;
398    while beg < e && !s[e - 1].is_whitespace() {
399        e -= 1;
400    }
401    e
402}
403
404fn trim_idx(s: &[char], beg: usize, end: usize) -> (usize, usize) {
405    assert_str_integrity(s, beg, end);
406    let mut b = beg;
407    let mut e = end;
408    while b < e && s[b].is_whitespace() {
409        b += 1;
410    }
411    while beg < e && s[e - 1].is_whitespace() {
412        e -= 1;
413    }
414    (b, e)
415}
416
417fn get_output_chunks(
418    all_before: &[char],
419    keyword: &str,
420    all_after: &[char],
421    config: &Config,
422) -> (String, String, String, String) {
423    // Chunk size logics are mostly copied from the GNU ptx source.
424    // https://github.com/MaiZure/coreutils-8.3/blob/master/src/ptx.c#L1234
425    let half_line_size = config.line_width / 2;
426    let max_before_size = cmp::max(half_line_size as isize - config.gap_size as isize, 0) as usize;
427    let max_after_size = cmp::max(
428        half_line_size as isize
429            - (2 * config.trunc_str.len()) as isize
430            - keyword.len() as isize
431            - 1,
432        0,
433    ) as usize;
434
435    // Allocate plenty space for all the chunks.
436    let mut head = String::with_capacity(half_line_size);
437    let mut before = String::with_capacity(half_line_size);
438    let mut after = String::with_capacity(half_line_size);
439    let mut tail = String::with_capacity(half_line_size);
440
441    // the before chunk
442
443    // trim whitespace away from all_before to get the index where the before chunk should end.
444    let (_, before_end) = trim_idx(all_before, 0, all_before.len());
445
446    // the minimum possible begin index of the before_chunk is the end index minus the length.
447    let before_beg = cmp::max(before_end as isize - max_before_size as isize, 0) as usize;
448    // in case that falls in the middle of a word, trim away the word.
449    let before_beg = trim_broken_word_left(all_before, before_beg, before_end);
450
451    // trim away white space.
452    let (before_beg, before_end) = trim_idx(all_before, before_beg, before_end);
453
454    // and get the string.
455    let before_str: String = all_before[before_beg..before_end].iter().collect();
456    before.push_str(&before_str);
457    assert!(max_before_size >= before.len());
458
459    // the after chunk
460
461    // must be no longer than the minimum between the max size and the total available string.
462    let after_end = cmp::min(max_after_size, all_after.len());
463    // in case that falls in the middle of a word, trim away the word.
464    let after_end = trim_broken_word_right(all_after, 0, after_end);
465
466    // trim away white space.
467    let (_, after_end) = trim_idx(all_after, 0, after_end);
468
469    // and get the string
470    let after_str: String = all_after[0..after_end].iter().collect();
471    after.push_str(&after_str);
472    assert!(max_after_size >= after.len());
473
474    // the tail chunk
475
476    // max size of the tail chunk = max size of left half - space taken by before chunk - gap size.
477    let max_tail_size = cmp::max(
478        max_before_size as isize - before.len() as isize - config.gap_size as isize,
479        0,
480    ) as usize;
481
482    // the tail chunk takes text starting from where the after chunk ends (with whitespace trimmed).
483    let (tail_beg, _) = trim_idx(all_after, after_end, all_after.len());
484
485    // end = begin + max length
486    let tail_end = cmp::min(all_after.len(), tail_beg + max_tail_size);
487    // in case that falls in the middle of a word, trim away the word.
488    let tail_end = trim_broken_word_right(all_after, tail_beg, tail_end);
489
490    // trim away whitespace again.
491    let (tail_beg, mut tail_end) = trim_idx(all_after, tail_beg, tail_end);
492    // Fix: Manually trim trailing char (like "a") that are preceded by a space.
493    // This handles cases like "is a" which are not correctly trimmed by the
494    // preceding functions.
495    if tail_end >= 2
496        && (tail_end - 2) > tail_beg
497        && all_after[tail_end - 2].is_whitespace()
498        && !all_after[tail_end - 1].is_whitespace()
499    {
500        tail_end -= 1;
501        (_, tail_end) = trim_idx(all_after, tail_beg, tail_end);
502    }
503
504    // and get the string
505    let tail_str: String = all_after[tail_beg..tail_end].iter().collect();
506    tail.push_str(&tail_str);
507
508    // the head chunk
509
510    // max size of the head chunk = max size of right half - space taken by after chunk - gap size.
511    let max_head_size = cmp::max(
512        max_after_size as isize - after.len() as isize - config.gap_size as isize,
513        0,
514    ) as usize;
515
516    // the head chunk takes text from before the before chunk
517    let (_, head_end) = trim_idx(all_before, 0, before_beg);
518
519    // begin = end - max length
520    let head_beg = cmp::max(head_end as isize - max_head_size as isize, 0) as usize;
521    // in case that falls in the middle of a word, trim away the word.
522    let head_beg = trim_broken_word_left(all_before, head_beg, head_end);
523
524    // trim away white space again.
525    let (head_beg, head_end) = trim_idx(all_before, head_beg, head_end);
526
527    // and get the string.
528    let head_str: String = all_before[head_beg..head_end].iter().collect();
529    head.push_str(&head_str);
530    //The TeX mode does not output truncation characters.
531    if config.format != OutFormat::Tex {
532        // put right context truncation string if needed
533        if after_end != all_after.len() && tail_beg == tail_end {
534            after.push_str(&config.trunc_str);
535        } else if after_end != all_after.len() && tail_end != all_after.len() {
536            tail.push_str(&config.trunc_str);
537        }
538
539        // put left context truncation string if needed
540        if before_beg != 0 && head_beg == head_end {
541            before = format!("{}{before}", config.trunc_str);
542        } else if before_beg != 0 && head_beg != 0 {
543            head = format!("{}{head}", config.trunc_str);
544        }
545    }
546
547    (tail, before, after, head)
548}
549
550fn tex_mapper(x: char) -> String {
551    match x {
552        '\\' => "\\backslash{}".to_owned(),
553        '$' | '%' | '#' | '&' | '_' => format!("\\{x}"),
554        '}' | '{' => format!("$\\{x}$"),
555        _ => x.to_string(),
556    }
557}
558
559/// Escape special characters for TeX.
560fn format_tex_field(s: &str) -> String {
561    let mapped_chunks: Vec<String> = s.chars().map(tex_mapper).collect();
562    mapped_chunks.join("")
563}
564
565fn format_tex_line(
566    config: &Config,
567    word_ref: &WordRef,
568    line: &str,
569    chars_line: &[char],
570    reference: &str,
571) -> String {
572    let mut output = String::new();
573    write!(output, "\\{} ", config.macro_name).unwrap();
574    let (tail, before, keyword, after, head) =
575        prepare_line_chunks(config, word_ref, line, chars_line, reference);
576    write!(
577        output,
578        "{{{0}}}{{{1}}}{{{2}}}{{{3}}}{{{4}}}",
579        format_tex_field(&tail),
580        format_tex_field(&before),
581        format_tex_field(&keyword),
582        format_tex_field(&after),
583        format_tex_field(&head),
584    )
585    .unwrap();
586    if config.auto_ref || config.input_ref {
587        write!(output, "{{{}}}", format_tex_field(reference)).unwrap();
588    }
589    output
590}
591
592fn format_roff_field(s: &str) -> String {
593    s.replace('\"', "\"\"")
594}
595
596fn format_roff_line(
597    config: &Config,
598    word_ref: &WordRef,
599    line: &str,
600    chars_line: &[char],
601    reference: &str,
602) -> String {
603    let mut output = String::new();
604    write!(output, ".{}", config.macro_name).unwrap();
605    let (tail, before, keyword, after, head) =
606        prepare_line_chunks(config, word_ref, line, chars_line, reference);
607    write!(
608        output,
609        " \"{}\" \"{}\" \"{}{}\" \"{}\"",
610        format_roff_field(&tail),
611        format_roff_field(&before),
612        format_roff_field(&keyword),
613        format_roff_field(&after),
614        format_roff_field(&head)
615    )
616    .unwrap();
617    if config.auto_ref || config.input_ref {
618        write!(output, " \"{}\"", format_roff_field(reference)).unwrap();
619    }
620    output
621}
622
623/// Extract and prepare text chunks for formatting in both TeX and roff output
624fn prepare_line_chunks(
625    config: &Config,
626    word_ref: &WordRef,
627    line: &str,
628    chars_line: &[char],
629    reference: &str,
630) -> (String, String, String, String, String) {
631    // Convert byte positions to character positions
632    let ref_char_position = line[..word_ref.position].chars().count();
633    let char_position_end = ref_char_position
634        + line[word_ref.position..word_ref.position_end]
635            .chars()
636            .count();
637
638    // Extract the text before the keyword
639    let all_before = if config.input_ref {
640        let before = &line[..word_ref.position];
641        let before_char_count = before.chars().count();
642        let trimmed_char_count = before
643            .trim_start_matches(reference)
644            .trim_start()
645            .chars()
646            .count();
647        let trim_offset = before_char_count - trimmed_char_count;
648        &chars_line[trim_offset..before_char_count]
649    } else {
650        &chars_line[..ref_char_position]
651    };
652
653    // Extract the keyword and text after it
654    let keyword = line[word_ref.position..word_ref.position_end].to_string();
655    let all_after = &chars_line[char_position_end..];
656
657    // Get formatted output chunks
658    let (tail, before, after, head) = get_output_chunks(all_before, &keyword, all_after, config);
659
660    (tail, before, keyword, after, head)
661}
662
663fn write_traditional_output(
664    config: &mut Config,
665    file_map: &FileMap,
666    words: &BTreeSet<WordRef>,
667    output_filename: &OsStr,
668) -> UResult<()> {
669    let mut writer: BufWriter<Box<dyn Write>> =
670        BufWriter::new(if output_filename == OsStr::new("-") {
671            Box::new(stdout())
672        } else {
673            let file = File::create(output_filename)
674                .map_err_context(|| output_filename.to_string_lossy().quote().to_string())?;
675            Box::new(file)
676        });
677
678    let context_reg = Regex::new(&config.context_regex).unwrap();
679
680    if !config.right_ref {
681        let max_ref_len = if config.auto_ref {
682            get_auto_max_reference_len(words)
683        } else {
684            0
685        };
686        config.line_width -= max_ref_len;
687    }
688
689    for word_ref in words {
690        let file_map_value: &FileContent = file_map
691            .get(&word_ref.filename)
692            .expect("Missing file in file map");
693        let FileContent {
694            ref lines,
695            ref chars_lines,
696            offset: _,
697        } = *(file_map_value);
698        let reference = get_reference(
699            config,
700            word_ref,
701            &lines[word_ref.local_line_nr],
702            &context_reg,
703        );
704        let output_line: String = match config.format {
705            OutFormat::Tex => format_tex_line(
706                config,
707                word_ref,
708                &lines[word_ref.local_line_nr],
709                &chars_lines[word_ref.local_line_nr],
710                &reference,
711            ),
712            OutFormat::Roff => format_roff_line(
713                config,
714                word_ref,
715                &lines[word_ref.local_line_nr],
716                &chars_lines[word_ref.local_line_nr],
717                &reference,
718            ),
719            OutFormat::Dumb => {
720                return Err(PtxError::DumbFormat.into());
721            }
722        };
723        writeln!(writer, "{output_line}")
724            .map_err_context(|| translate!("ptx-error-write-failed"))?;
725    }
726
727    writer
728        .flush()
729        .map_err_context(|| translate!("ptx-error-write-failed"))?;
730
731    Ok(())
732}
733
734fn get_auto_max_reference_len(words: &BTreeSet<WordRef>) -> usize {
735    //Get the maximum length of the reference field
736    let line_num = words
737        .iter()
738        .map(|w| {
739            if w.local_line_nr == 0 {
740                1
741            } else {
742                (w.local_line_nr as f64).log10() as usize + 1
743            }
744        })
745        .max()
746        .unwrap_or(0);
747
748    let filename_len = words
749        .iter()
750        .filter(|w| w.filename != "-")
751        .map(|w| w.filename.maybe_quote().to_string().len())
752        .max()
753        .unwrap_or(0);
754
755    // +1 for the colon
756    line_num + filename_len + 1
757}
758
759mod options {
760    pub mod format {
761        pub static ROFF: &str = "roff";
762        pub static TEX: &str = "tex";
763    }
764
765    pub static FILE: &str = "file";
766    pub static AUTO_REFERENCE: &str = "auto-reference";
767    pub static TRADITIONAL: &str = "traditional";
768    pub static FLAG_TRUNCATION: &str = "flag-truncation";
769    pub static MACRO_NAME: &str = "macro-name";
770    pub static FORMAT: &str = "format";
771    pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
772    pub static SENTENCE_REGEXP: &str = "sentence-regexp";
773    pub static WORD_REGEXP: &str = "word-regexp";
774    pub static BREAK_FILE: &str = "break-file";
775    pub static IGNORE_CASE: &str = "ignore-case";
776    pub static GAP_SIZE: &str = "gap-size";
777    pub static IGNORE_FILE: &str = "ignore-file";
778    pub static ONLY_FILE: &str = "only-file";
779    pub static REFERENCES: &str = "references";
780    pub static WIDTH: &str = "width";
781}
782
783#[uucore::main]
784pub fn uumain(args: impl uucore::Args) -> UResult<()> {
785    let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
786    let mut config = get_config(&matches)?;
787
788    let input_files;
789    let output_file: OsString;
790
791    let mut files = matches
792        .get_many::<OsString>(options::FILE)
793        .into_iter()
794        .flatten()
795        .cloned();
796
797    if config.gnu_ext {
798        input_files = {
799            let mut files = files.collect::<Vec<_>>();
800            if files.is_empty() {
801                files.push(OsString::from("-"));
802            }
803            files
804        };
805        output_file = OsString::from("-");
806    } else {
807        input_files = vec![files.next().unwrap_or(OsString::from("-"))];
808        output_file = files.next().unwrap_or(OsString::from("-"));
809        if let Some(file) = files.next() {
810            return Err(UUsageError::new(
811                1,
812                translate!("ptx-error-extra-operand", "operand" => file.to_string_lossy().quote()),
813            ));
814        }
815    }
816
817    let word_filter = WordFilter::new(&matches, &config)?;
818    let file_map = read_input(&input_files).map_err_context(String::new)?;
819    let word_set = create_word_set(&config, &word_filter, &file_map);
820    write_traditional_output(&mut config, &file_map, &word_set, &output_file)
821}
822
823pub fn uu_app() -> Command {
824    Command::new(uucore::util_name())
825        .about(translate!("ptx-about"))
826        .version(uucore::crate_version!())
827        .help_template(uucore::localized_help_template(uucore::util_name()))
828        .override_usage(format_usage(&translate!("ptx-usage")))
829        .infer_long_args(true)
830        .arg(
831            Arg::new(options::FILE)
832                .hide(true)
833                .action(ArgAction::Append)
834                .value_hint(clap::ValueHint::FilePath)
835                .value_parser(clap::value_parser!(OsString)),
836        )
837        .arg(
838            Arg::new(options::AUTO_REFERENCE)
839                .short('A')
840                .long(options::AUTO_REFERENCE)
841                .help(translate!("ptx-help-auto-reference"))
842                .action(ArgAction::SetTrue),
843        )
844        .arg(
845            Arg::new(options::TRADITIONAL)
846                .short('G')
847                .long(options::TRADITIONAL)
848                .help(translate!("ptx-help-traditional"))
849                .action(ArgAction::SetTrue),
850        )
851        .arg(
852            Arg::new(options::FLAG_TRUNCATION)
853                .short('F')
854                .long(options::FLAG_TRUNCATION)
855                .help(translate!("ptx-help-flag-truncation"))
856                .value_name("STRING"),
857        )
858        .arg(
859            Arg::new(options::MACRO_NAME)
860                .short('M')
861                .long(options::MACRO_NAME)
862                .help(translate!("ptx-help-macro-name"))
863                .value_name("STRING"),
864        )
865        .arg(
866            Arg::new(options::FORMAT)
867                .long(options::FORMAT)
868                .hide(true)
869                .value_parser(["roff", "tex"])
870                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]),
871        )
872        .arg(
873            Arg::new(options::format::ROFF)
874                .short('O')
875                .help(translate!("ptx-help-roff"))
876                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
877                .action(ArgAction::SetTrue),
878        )
879        .arg(
880            Arg::new(options::format::TEX)
881                .short('T')
882                .help(translate!("ptx-help-tex"))
883                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
884                .action(ArgAction::SetTrue),
885        )
886        .arg(
887            Arg::new(options::RIGHT_SIDE_REFS)
888                .short('R')
889                .long(options::RIGHT_SIDE_REFS)
890                .help(translate!("ptx-help-right-side-refs"))
891                .action(ArgAction::SetTrue),
892        )
893        .arg(
894            Arg::new(options::SENTENCE_REGEXP)
895                .short('S')
896                .long(options::SENTENCE_REGEXP)
897                .help(translate!("ptx-help-sentence-regexp"))
898                .value_name("REGEXP"),
899        )
900        .arg(
901            Arg::new(options::WORD_REGEXP)
902                .short('W')
903                .long(options::WORD_REGEXP)
904                .help(translate!("ptx-help-word-regexp"))
905                .value_name("REGEXP"),
906        )
907        .arg(
908            Arg::new(options::BREAK_FILE)
909                .short('b')
910                .long(options::BREAK_FILE)
911                .help(translate!("ptx-help-break-file"))
912                .value_name("FILE")
913                .value_hint(clap::ValueHint::FilePath)
914                .value_parser(clap::value_parser!(OsString)),
915        )
916        .arg(
917            Arg::new(options::IGNORE_CASE)
918                .short('f')
919                .long(options::IGNORE_CASE)
920                .help(translate!("ptx-help-ignore-case"))
921                .action(ArgAction::SetTrue),
922        )
923        .arg(
924            Arg::new(options::GAP_SIZE)
925                .short('g')
926                .long(options::GAP_SIZE)
927                .help(translate!("ptx-help-gap-size"))
928                .value_name("NUMBER"),
929        )
930        .arg(
931            Arg::new(options::IGNORE_FILE)
932                .short('i')
933                .long(options::IGNORE_FILE)
934                .help(translate!("ptx-help-ignore-file"))
935                .value_name("FILE")
936                .value_hint(clap::ValueHint::FilePath)
937                .value_parser(clap::value_parser!(OsString)),
938        )
939        .arg(
940            Arg::new(options::ONLY_FILE)
941                .short('o')
942                .long(options::ONLY_FILE)
943                .help(translate!("ptx-help-only-file"))
944                .value_name("FILE")
945                .value_hint(clap::ValueHint::FilePath)
946                .value_parser(clap::value_parser!(OsString)),
947        )
948        .arg(
949            Arg::new(options::REFERENCES)
950                .short('r')
951                .long(options::REFERENCES)
952                .help(translate!("ptx-help-references"))
953                .value_name("FILE")
954                .action(ArgAction::SetTrue),
955        )
956        .arg(
957            Arg::new(options::WIDTH)
958                .short('w')
959                .long(options::WIDTH)
960                .help(translate!("ptx-help-width"))
961                .value_name("NUMBER"),
962        )
963}
uu_ptx/ptx.rs

uu_ptx/
ptx.rs