uu_ptx/
ptx.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5
6// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
7
8use std::cmp;
9use std::cmp::PartialEq;
10use std::collections::{BTreeSet, HashMap, HashSet};
11use std::ffi::{OsStr, OsString};
12use std::fmt::Write as FmtWrite;
13use std::fs::File;
14use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
15use std::num::ParseIntError;
16use std::path::Path;
17
18use clap::{Arg, ArgAction, Command};
19use regex::Regex;
20use thiserror::Error;
21use uucore::display::Quotable;
22use uucore::error::{FromIo, UError, UResult, UUsageError};
23use uucore::format_usage;
24use uucore::translate;
25
26#[derive(Debug, PartialEq)]
27enum OutFormat {
28    Dumb,
29    Roff,
30    Tex,
31}
32
33#[derive(Debug)]
34struct Config {
35    format: OutFormat,
36    gnu_ext: bool,
37    auto_ref: bool,
38    input_ref: bool,
39    right_ref: bool,
40    ignore_case: bool,
41    macro_name: String,
42    trunc_str: String,
43    context_regex: String,
44    line_width: usize,
45    gap_size: usize,
46}
47
48impl Default for Config {
49    fn default() -> Self {
50        Self {
51            format: OutFormat::Dumb,
52            gnu_ext: true,
53            auto_ref: false,
54            input_ref: false,
55            right_ref: false,
56            ignore_case: false,
57            macro_name: "xx".to_owned(),
58            trunc_str: "/".to_owned(),
59            context_regex: "\\w+".to_owned(),
60            line_width: 72,
61            gap_size: 3,
62        }
63    }
64}
65
66fn read_word_filter_file(
67    matches: &clap::ArgMatches,
68    option: &str,
69) -> std::io::Result<HashSet<String>> {
70    let filename = matches
71        .get_one::<OsString>(option)
72        .expect("parsing options failed!");
73    let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
74        Box::new(stdin())
75    } else {
76        let file = File::open(Path::new(filename))?;
77        Box::new(file)
78    });
79    let mut words: HashSet<String> = HashSet::new();
80    for word in reader.lines() {
81        words.insert(word?);
82    }
83    Ok(words)
84}
85
86/// reads contents of file as unique set of characters to be used with the break-file option
87fn read_char_filter_file(
88    matches: &clap::ArgMatches,
89    option: &str,
90) -> std::io::Result<HashSet<char>> {
91    let filename = matches
92        .get_one::<OsString>(option)
93        .expect("parsing options failed!");
94    let mut reader: Box<dyn Read> = if filename == "-" {
95        Box::new(stdin())
96    } else {
97        let file = File::open(Path::new(filename))?;
98        Box::new(file)
99    };
100    let mut buffer = String::new();
101    reader.read_to_string(&mut buffer)?;
102    Ok(buffer.chars().collect())
103}
104
105#[derive(Debug)]
106struct WordFilter {
107    only_specified: bool,
108    ignore_specified: bool,
109    only_set: HashSet<String>,
110    ignore_set: HashSet<String>,
111    word_regex: String,
112}
113
114impl WordFilter {
115    #[allow(clippy::cognitive_complexity)]
116    fn new(matches: &clap::ArgMatches, config: &Config) -> UResult<Self> {
117        let (o, oset): (bool, HashSet<String>) = if matches.contains_id(options::ONLY_FILE) {
118            let words =
119                read_word_filter_file(matches, options::ONLY_FILE).map_err_context(String::new)?;
120            (true, words)
121        } else {
122            (false, HashSet::new())
123        };
124        let (i, iset): (bool, HashSet<String>) = if matches.contains_id(options::IGNORE_FILE) {
125            let words = read_word_filter_file(matches, options::IGNORE_FILE)
126                .map_err_context(String::new)?;
127            (true, words)
128        } else {
129            (false, HashSet::new())
130        };
131        let break_set: Option<HashSet<char>> = if matches.contains_id(options::BREAK_FILE)
132            && !matches.contains_id(options::WORD_REGEXP)
133        {
134            let chars =
135                read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
136            let mut hs: HashSet<char> = if config.gnu_ext {
137                HashSet::new() // really only chars found in file
138            } else {
139                // GNU off means at least these are considered
140                [' ', '\t', '\n'].iter().copied().collect()
141            };
142            hs.extend(chars);
143            Some(hs)
144        } else {
145            // if -W takes precedence or default
146            None
147        };
148        // Ignore empty string regex from cmd-line-args
149        let arg_reg: Option<String> = if matches.contains_id(options::WORD_REGEXP) {
150            match matches.get_one::<String>(options::WORD_REGEXP) {
151                Some(v) => {
152                    if v.is_empty() {
153                        None
154                    } else {
155                        Some(v.to_owned())
156                    }
157                }
158                None => None,
159            }
160        } else {
161            None
162        };
163        let reg = match arg_reg {
164            Some(arg_reg) => arg_reg,
165            None => {
166                if let Some(break_set) = break_set {
167                    format!(
168                        "[^{}]+",
169                        regex::escape(&break_set.into_iter().collect::<String>())
170                    )
171                } else if config.gnu_ext {
172                    "\\w+".to_owned()
173                } else {
174                    "[^ \t\n]+".to_owned()
175                }
176            }
177        };
178        Ok(Self {
179            only_specified: o,
180            ignore_specified: i,
181            only_set: oset,
182            ignore_set: iset,
183            word_regex: reg,
184        })
185    }
186}
187
188#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
189struct WordRef {
190    word: String,
191    global_line_nr: usize,
192    local_line_nr: usize,
193    position: usize,
194    position_end: usize,
195    filename: OsString,
196}
197
198#[derive(Debug, Error)]
199enum PtxError {
200    #[error("{}", translate!("ptx-error-not-implemented", "feature" => (*.0)))]
201    NotImplemented(&'static str),
202
203    #[error("{0}")]
204    ParseError(ParseIntError),
205}
206
207impl UError for PtxError {}
208
209fn get_config(matches: &clap::ArgMatches) -> UResult<Config> {
210    let mut config = Config::default();
211    let err_msg = "parsing options failed";
212    if matches.get_flag(options::TRADITIONAL) {
213        config.gnu_ext = false;
214        config.format = OutFormat::Roff;
215        "[^ \t\n]+".clone_into(&mut config.context_regex);
216    }
217    if matches.contains_id(options::SENTENCE_REGEXP) {
218        return Err(PtxError::NotImplemented("-S").into());
219    }
220    config.auto_ref = matches.get_flag(options::AUTO_REFERENCE);
221    config.input_ref = matches.get_flag(options::REFERENCES);
222    config.right_ref = matches.get_flag(options::RIGHT_SIDE_REFS);
223    config.ignore_case = matches.get_flag(options::IGNORE_CASE);
224    if matches.contains_id(options::MACRO_NAME) {
225        matches
226            .get_one::<String>(options::MACRO_NAME)
227            .expect(err_msg)
228            .clone_into(&mut config.macro_name);
229    }
230    if matches.contains_id(options::FLAG_TRUNCATION) {
231        matches
232            .get_one::<String>(options::FLAG_TRUNCATION)
233            .expect(err_msg)
234            .clone_into(&mut config.trunc_str);
235    }
236    if matches.contains_id(options::WIDTH) {
237        config.line_width = matches
238            .get_one::<String>(options::WIDTH)
239            .expect(err_msg)
240            .parse()
241            .map_err(PtxError::ParseError)?;
242    }
243    if matches.contains_id(options::GAP_SIZE) {
244        config.gap_size = matches
245            .get_one::<String>(options::GAP_SIZE)
246            .expect(err_msg)
247            .parse()
248            .map_err(PtxError::ParseError)?;
249    }
250    if let Some(format) = matches.get_one::<String>(options::FORMAT) {
251        config.format = match format.as_str() {
252            "roff" => OutFormat::Roff,
253            "tex" => OutFormat::Tex,
254            _ => unreachable!("should be caught by clap"),
255        };
256    }
257    if matches.get_flag(options::format::ROFF) {
258        config.format = OutFormat::Roff;
259    }
260    if matches.get_flag(options::format::TEX) {
261        config.format = OutFormat::Tex;
262    }
263    Ok(config)
264}
265
266struct FileContent {
267    lines: Vec<String>,
268    chars_lines: Vec<Vec<char>>,
269    offset: usize,
270}
271
272type FileMap = HashMap<OsString, FileContent>;
273
274fn read_input(input_files: &[OsString]) -> std::io::Result<FileMap> {
275    let mut file_map: FileMap = HashMap::new();
276    let mut offset: usize = 0;
277    for filename in input_files {
278        let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
279            Box::new(stdin())
280        } else {
281            let file = File::open(Path::new(filename))?;
282            Box::new(file)
283        });
284        let lines: Vec<String> = reader.lines().collect::<std::io::Result<Vec<String>>>()?;
285
286        // Indexing UTF-8 string requires walking from the beginning, which can hurts performance badly when the line is long.
287        // Since we will be jumping around the line a lot, we dump the content into a Vec<char>, which can be indexed in constant time.
288        let chars_lines: Vec<Vec<char>> = lines.iter().map(|x| x.chars().collect()).collect();
289        let size = lines.len();
290        file_map.insert(
291            filename.clone(),
292            FileContent {
293                lines,
294                chars_lines,
295                offset,
296            },
297        );
298        offset += size;
299    }
300    Ok(file_map)
301}
302
303/// Go through every lines in the input files and record each match occurrence as a `WordRef`.
304fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) -> BTreeSet<WordRef> {
305    let reg = Regex::new(&filter.word_regex).unwrap();
306    let ref_reg = Regex::new(&config.context_regex).unwrap();
307    let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
308    for (file, lines) in file_map {
309        let mut count: usize = 0;
310        let offs = lines.offset;
311        for line in &lines.lines {
312            // if -r, exclude reference from word set
313            let (ref_beg, ref_end) = match ref_reg.find(line) {
314                Some(x) => (x.start(), x.end()),
315                None => (0, 0),
316            };
317            // match words with given regex
318            for mat in reg.find_iter(line) {
319                let (beg, end) = (mat.start(), mat.end());
320                if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
321                    continue;
322                }
323                let mut word = line[beg..end].to_owned();
324                if filter.only_specified && !filter.only_set.contains(&word) {
325                    continue;
326                }
327                if filter.ignore_specified && filter.ignore_set.contains(&word) {
328                    continue;
329                }
330                if config.ignore_case {
331                    word = word.to_uppercase();
332                }
333                word_set.insert(WordRef {
334                    word,
335                    filename: file.clone(),
336                    global_line_nr: offs + count,
337                    local_line_nr: count,
338                    position: beg,
339                    position_end: end,
340                });
341            }
342            count += 1;
343        }
344    }
345    word_set
346}
347
348fn get_reference(config: &Config, word_ref: &WordRef, line: &str, context_reg: &Regex) -> String {
349    if config.auto_ref {
350        if word_ref.filename == "-" {
351            format!(":{}", word_ref.local_line_nr + 1)
352        } else {
353            format!(
354                "{}:{}",
355                word_ref.filename.maybe_quote(),
356                word_ref.local_line_nr + 1
357            )
358        }
359    } else if config.input_ref {
360        let (beg, end) = match context_reg.find(line) {
361            Some(x) => (x.start(), x.end()),
362            None => (0, 0),
363        };
364        line[beg..end].to_string()
365    } else {
366        String::new()
367    }
368}
369
370fn assert_str_integrity(s: &[char], beg: usize, end: usize) {
371    assert!(beg <= end);
372    assert!(end <= s.len());
373}
374
375fn trim_broken_word_left(s: &[char], beg: usize, end: usize) -> usize {
376    assert_str_integrity(s, beg, end);
377    if beg == end || beg == 0 || s[beg].is_whitespace() || s[beg - 1].is_whitespace() {
378        return beg;
379    }
380    let mut b = beg;
381    while b < end && !s[b].is_whitespace() {
382        b += 1;
383    }
384    b
385}
386
387fn trim_broken_word_right(s: &[char], beg: usize, end: usize) -> usize {
388    assert_str_integrity(s, beg, end);
389    if beg == end || end == s.len() || s[end - 1].is_whitespace() || s[end].is_whitespace() {
390        return end;
391    }
392    let mut e = end;
393    while beg < e && !s[e - 1].is_whitespace() {
394        e -= 1;
395    }
396    e
397}
398
399fn trim_idx(s: &[char], beg: usize, end: usize) -> (usize, usize) {
400    assert_str_integrity(s, beg, end);
401    let mut b = beg;
402    let mut e = end;
403    while b < e && s[b].is_whitespace() {
404        b += 1;
405    }
406    while beg < e && s[e - 1].is_whitespace() {
407        e -= 1;
408    }
409    (b, e)
410}
411
412fn get_output_chunks(
413    all_before: &[char],
414    keyword: &str,
415    all_after: &[char],
416    config: &Config,
417) -> (String, String, String, String) {
418    // Chunk size logics are mostly copied from the GNU ptx source.
419    // https://github.com/MaiZure/coreutils-8.3/blob/master/src/ptx.c#L1234
420    let half_line_size = config.line_width / 2;
421    let max_before_size = cmp::max(half_line_size as isize - config.gap_size as isize, 0) as usize;
422    let max_after_size = cmp::max(
423        half_line_size as isize
424            - (2 * config.trunc_str.len()) as isize
425            - keyword.len() as isize
426            - 1,
427        0,
428    ) as usize;
429
430    // Allocate plenty space for all the chunks.
431    let mut head = String::with_capacity(half_line_size);
432    let mut before = String::with_capacity(half_line_size);
433    let mut after = String::with_capacity(half_line_size);
434    let mut tail = String::with_capacity(half_line_size);
435
436    // the before chunk
437
438    // trim whitespace away from all_before to get the index where the before chunk should end.
439    let (_, before_end) = trim_idx(all_before, 0, all_before.len());
440
441    // the minimum possible begin index of the before_chunk is the end index minus the length.
442    let before_beg = cmp::max(before_end as isize - max_before_size as isize, 0) as usize;
443    // in case that falls in the middle of a word, trim away the word.
444    let before_beg = trim_broken_word_left(all_before, before_beg, before_end);
445
446    // trim away white space.
447    let (before_beg, before_end) = trim_idx(all_before, before_beg, before_end);
448
449    // and get the string.
450    let before_str: String = all_before[before_beg..before_end].iter().collect();
451    before.push_str(&before_str);
452    assert!(max_before_size >= before.len());
453
454    // the after chunk
455
456    // must be no longer than the minimum between the max size and the total available string.
457    let after_end = cmp::min(max_after_size, all_after.len());
458    // in case that falls in the middle of a word, trim away the word.
459    let after_end = trim_broken_word_right(all_after, 0, after_end);
460
461    // trim away white space.
462    let (_, after_end) = trim_idx(all_after, 0, after_end);
463
464    // and get the string
465    let after_str: String = all_after[0..after_end].iter().collect();
466    after.push_str(&after_str);
467    assert!(max_after_size >= after.len());
468
469    // the tail chunk
470
471    // max size of the tail chunk = max size of left half - space taken by before chunk - gap size.
472    let max_tail_size = cmp::max(
473        max_before_size as isize - before.len() as isize - config.gap_size as isize,
474        0,
475    ) as usize;
476
477    // the tail chunk takes text starting from where the after chunk ends (with whitespace trimmed).
478    let (tail_beg, _) = trim_idx(all_after, after_end, all_after.len());
479
480    // end = begin + max length
481    let tail_end = cmp::min(all_after.len(), tail_beg + max_tail_size);
482    // in case that falls in the middle of a word, trim away the word.
483    let tail_end = trim_broken_word_right(all_after, tail_beg, tail_end);
484
485    // trim away whitespace again.
486    let (tail_beg, mut tail_end) = trim_idx(all_after, tail_beg, tail_end);
487    // Fix: Manually trim trailing char (like "a") that are preceded by a space.
488    // This handles cases like "is a" which are not correctly trimmed by the
489    // preceding functions.
490    if tail_end >= 2
491        && (tail_end - 2) > tail_beg
492        && all_after[tail_end - 2].is_whitespace()
493        && !all_after[tail_end - 1].is_whitespace()
494    {
495        tail_end -= 1;
496        (_, tail_end) = trim_idx(all_after, tail_beg, tail_end);
497    }
498
499    // and get the string
500    let tail_str: String = all_after[tail_beg..tail_end].iter().collect();
501    tail.push_str(&tail_str);
502
503    // the head chunk
504
505    // max size of the head chunk = max size of right half - space taken by after chunk - gap size.
506    let max_head_size = cmp::max(
507        max_after_size as isize - after.len() as isize - config.gap_size as isize,
508        0,
509    ) as usize;
510
511    // the head chunk takes text from before the before chunk
512    let (_, head_end) = trim_idx(all_before, 0, before_beg);
513
514    // begin = end - max length
515    let head_beg = cmp::max(head_end as isize - max_head_size as isize, 0) as usize;
516    // in case that falls in the middle of a word, trim away the word.
517    let head_beg = trim_broken_word_left(all_before, head_beg, head_end);
518
519    // trim away white space again.
520    let (head_beg, head_end) = trim_idx(all_before, head_beg, head_end);
521
522    // and get the string.
523    let head_str: String = all_before[head_beg..head_end].iter().collect();
524    head.push_str(&head_str);
525    //The TeX mode does not output truncation characters.
526    if config.format != OutFormat::Tex {
527        // put right context truncation string if needed
528        if after_end != all_after.len() && tail_beg == tail_end {
529            after.push_str(&config.trunc_str);
530        } else if after_end != all_after.len() && tail_end != all_after.len() {
531            tail.push_str(&config.trunc_str);
532        }
533
534        // put left context truncation string if needed
535        if before_beg != 0 && head_beg == head_end {
536            before = format!("{}{before}", config.trunc_str);
537        } else if before_beg != 0 && head_beg != 0 {
538            head = format!("{}{head}", config.trunc_str);
539        }
540    }
541
542    (tail, before, after, head)
543}
544
545fn tex_mapper(x: char) -> String {
546    match x {
547        '\\' => "\\backslash{}".to_owned(),
548        '$' | '%' | '#' | '&' | '_' => format!("\\{x}"),
549        '}' | '{' => format!("$\\{x}$"),
550        _ => x.to_string(),
551    }
552}
553
554/// Escape special characters for TeX.
555fn format_tex_field(s: &str) -> String {
556    let mapped_chunks: Vec<String> = s.chars().map(tex_mapper).collect();
557    mapped_chunks.join("")
558}
559
560fn format_tex_line(
561    config: &Config,
562    word_ref: &WordRef,
563    line: &str,
564    chars_line: &[char],
565    reference: &str,
566) -> String {
567    let mut output = String::new();
568    write!(output, "\\{} ", config.macro_name).unwrap();
569    let (tail, before, keyword, after, head) =
570        prepare_line_chunks(config, word_ref, line, chars_line, reference);
571    write!(
572        output,
573        "{{{0}}}{{{1}}}{{{2}}}{{{3}}}{{{4}}}",
574        format_tex_field(&tail),
575        format_tex_field(&before),
576        format_tex_field(&keyword),
577        format_tex_field(&after),
578        format_tex_field(&head),
579    )
580    .unwrap();
581    if config.auto_ref || config.input_ref {
582        write!(output, "{{{}}}", format_tex_field(reference)).unwrap();
583    }
584    output
585}
586
587fn format_dumb_line(
588    config: &Config,
589    word_ref: &WordRef,
590    line: &str,
591    chars_line: &[char],
592    reference: &str,
593) -> String {
594    let (tail, before, keyword, after, head) =
595        prepare_line_chunks(config, word_ref, line, chars_line, reference);
596
597    // Calculate the position for the left part
598    // The left part consists of tail (if present) + space + before
599    let left_part = if tail.is_empty() {
600        before
601    } else if before.is_empty() {
602        tail
603    } else {
604        format!("{tail} {before}")
605    };
606
607    // Calculate the position for the right part
608    let right_part = if head.is_empty() {
609        after
610    } else if after.is_empty() {
611        head
612    } else {
613        format!("{after} {head}")
614    };
615
616    // Calculate the width for the left half (before the keyword)
617    let half_width = config.line_width / 2;
618
619    // Right-justify the left part within the left half
620    let padding = if left_part.len() < half_width {
621        half_width - left_part.len()
622    } else {
623        0
624    };
625
626    // Build the output line with padding, left part, gap, keyword, and right part
627    let mut output = String::new();
628    output.push_str(&" ".repeat(padding));
629    output.push_str(&left_part);
630
631    // Add gap before keyword
632    output.push_str(&" ".repeat(config.gap_size));
633
634    output.push_str(&keyword);
635    output.push_str(&right_part);
636
637    // Add reference if needed
638    if config.auto_ref || config.input_ref {
639        if config.right_ref {
640            output.push(' ');
641            output.push_str(reference);
642        } else {
643            output = format!("{reference} {output}");
644        }
645    }
646
647    output
648}
649
650fn format_roff_field(s: &str) -> String {
651    s.replace('\"', "\"\"")
652}
653
654fn format_roff_line(
655    config: &Config,
656    word_ref: &WordRef,
657    line: &str,
658    chars_line: &[char],
659    reference: &str,
660) -> String {
661    let mut output = String::new();
662    write!(output, ".{}", config.macro_name).unwrap();
663    let (tail, before, keyword, after, head) =
664        prepare_line_chunks(config, word_ref, line, chars_line, reference);
665    write!(
666        output,
667        " \"{}\" \"{}\" \"{}{}\" \"{}\"",
668        format_roff_field(&tail),
669        format_roff_field(&before),
670        format_roff_field(&keyword),
671        format_roff_field(&after),
672        format_roff_field(&head)
673    )
674    .unwrap();
675    if config.auto_ref || config.input_ref {
676        write!(output, " \"{}\"", format_roff_field(reference)).unwrap();
677    }
678    output
679}
680
681/// Extract and prepare text chunks for formatting in both TeX and roff output
682fn prepare_line_chunks(
683    config: &Config,
684    word_ref: &WordRef,
685    line: &str,
686    chars_line: &[char],
687    reference: &str,
688) -> (String, String, String, String, String) {
689    // Convert byte positions to character positions
690    let ref_char_position = line[..word_ref.position].chars().count();
691    let char_position_end = ref_char_position
692        + line[word_ref.position..word_ref.position_end]
693            .chars()
694            .count();
695
696    // Extract the text before the keyword
697    let all_before = if config.input_ref {
698        let before = &line[..word_ref.position];
699        let before_char_count = before.chars().count();
700        let trimmed_char_count = before
701            .trim_start_matches(reference)
702            .trim_start()
703            .chars()
704            .count();
705        let trim_offset = before_char_count - trimmed_char_count;
706        &chars_line[trim_offset..before_char_count]
707    } else {
708        &chars_line[..ref_char_position]
709    };
710
711    // Extract the keyword and text after it
712    let keyword = line[word_ref.position..word_ref.position_end].to_string();
713    let all_after = &chars_line[char_position_end..];
714
715    // Get formatted output chunks
716    let (tail, before, after, head) = get_output_chunks(all_before, &keyword, all_after, config);
717
718    (tail, before, keyword, after, head)
719}
720
721fn write_traditional_output(
722    config: &mut Config,
723    file_map: &FileMap,
724    words: &BTreeSet<WordRef>,
725    output_filename: &OsStr,
726) -> UResult<()> {
727    let mut writer: BufWriter<Box<dyn Write>> =
728        BufWriter::new(if output_filename == OsStr::new("-") {
729            Box::new(stdout())
730        } else {
731            let file = File::create(output_filename)
732                .map_err_context(|| output_filename.to_string_lossy().quote().to_string())?;
733            Box::new(file)
734        });
735
736    let context_reg = Regex::new(&config.context_regex).unwrap();
737
738    if !config.right_ref {
739        let max_ref_len = if config.auto_ref {
740            get_auto_max_reference_len(words)
741        } else {
742            0
743        };
744        config.line_width -= max_ref_len;
745    }
746
747    for word_ref in words {
748        let file_map_value: &FileContent = file_map
749            .get(&word_ref.filename)
750            .expect("Missing file in file map");
751        let FileContent {
752            ref lines,
753            ref chars_lines,
754            offset: _,
755        } = *(file_map_value);
756        let reference = get_reference(
757            config,
758            word_ref,
759            &lines[word_ref.local_line_nr],
760            &context_reg,
761        );
762        let output_line: String = match config.format {
763            OutFormat::Tex => format_tex_line(
764                config,
765                word_ref,
766                &lines[word_ref.local_line_nr],
767                &chars_lines[word_ref.local_line_nr],
768                &reference,
769            ),
770            OutFormat::Roff => format_roff_line(
771                config,
772                word_ref,
773                &lines[word_ref.local_line_nr],
774                &chars_lines[word_ref.local_line_nr],
775                &reference,
776            ),
777            OutFormat::Dumb => format_dumb_line(
778                config,
779                word_ref,
780                &lines[word_ref.local_line_nr],
781                &chars_lines[word_ref.local_line_nr],
782                &reference,
783            ),
784        };
785        writeln!(writer, "{output_line}")
786            .map_err_context(|| translate!("ptx-error-write-failed"))?;
787    }
788
789    writer
790        .flush()
791        .map_err_context(|| translate!("ptx-error-write-failed"))?;
792
793    Ok(())
794}
795
796fn get_auto_max_reference_len(words: &BTreeSet<WordRef>) -> usize {
797    //Get the maximum length of the reference field
798    let line_num = words
799        .iter()
800        .map(|w| {
801            if w.local_line_nr == 0 {
802                1
803            } else {
804                (w.local_line_nr as f64).log10() as usize + 1
805            }
806        })
807        .max()
808        .unwrap_or(0);
809
810    let filename_len = words
811        .iter()
812        .filter(|w| w.filename != "-")
813        .map(|w| w.filename.maybe_quote().to_string().len())
814        .max()
815        .unwrap_or(0);
816
817    // +1 for the colon
818    line_num + filename_len + 1
819}
820
821mod options {
822    pub mod format {
823        pub static ROFF: &str = "roff";
824        pub static TEX: &str = "tex";
825    }
826
827    pub static FILE: &str = "file";
828    pub static AUTO_REFERENCE: &str = "auto-reference";
829    pub static TRADITIONAL: &str = "traditional";
830    pub static FLAG_TRUNCATION: &str = "flag-truncation";
831    pub static MACRO_NAME: &str = "macro-name";
832    pub static FORMAT: &str = "format";
833    pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
834    pub static SENTENCE_REGEXP: &str = "sentence-regexp";
835    pub static WORD_REGEXP: &str = "word-regexp";
836    pub static BREAK_FILE: &str = "break-file";
837    pub static IGNORE_CASE: &str = "ignore-case";
838    pub static GAP_SIZE: &str = "gap-size";
839    pub static IGNORE_FILE: &str = "ignore-file";
840    pub static ONLY_FILE: &str = "only-file";
841    pub static REFERENCES: &str = "references";
842    pub static WIDTH: &str = "width";
843}
844
845#[uucore::main]
846pub fn uumain(args: impl uucore::Args) -> UResult<()> {
847    let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
848    let mut config = get_config(&matches)?;
849
850    let input_files;
851    let output_file: OsString;
852
853    let mut files = matches
854        .get_many::<OsString>(options::FILE)
855        .into_iter()
856        .flatten()
857        .cloned();
858
859    if config.gnu_ext {
860        input_files = {
861            let mut files = files.collect::<Vec<_>>();
862            if files.is_empty() {
863                files.push(OsString::from("-"));
864            }
865            files
866        };
867        output_file = OsString::from("-");
868    } else {
869        input_files = vec![files.next().unwrap_or(OsString::from("-"))];
870        output_file = files.next().unwrap_or(OsString::from("-"));
871        if let Some(file) = files.next() {
872            return Err(UUsageError::new(
873                1,
874                translate!("ptx-error-extra-operand", "operand" => file.to_string_lossy().quote()),
875            ));
876        }
877    }
878
879    let word_filter = WordFilter::new(&matches, &config)?;
880    let file_map = read_input(&input_files).map_err_context(String::new)?;
881    let word_set = create_word_set(&config, &word_filter, &file_map);
882    write_traditional_output(&mut config, &file_map, &word_set, &output_file)
883}
884
885pub fn uu_app() -> Command {
886    Command::new(uucore::util_name())
887        .about(translate!("ptx-about"))
888        .version(uucore::crate_version!())
889        .help_template(uucore::localized_help_template(uucore::util_name()))
890        .override_usage(format_usage(&translate!("ptx-usage")))
891        .infer_long_args(true)
892        .arg(
893            Arg::new(options::FILE)
894                .hide(true)
895                .action(ArgAction::Append)
896                .value_hint(clap::ValueHint::FilePath)
897                .value_parser(clap::value_parser!(OsString)),
898        )
899        .arg(
900            Arg::new(options::AUTO_REFERENCE)
901                .short('A')
902                .long(options::AUTO_REFERENCE)
903                .help(translate!("ptx-help-auto-reference"))
904                .action(ArgAction::SetTrue),
905        )
906        .arg(
907            Arg::new(options::TRADITIONAL)
908                .short('G')
909                .long(options::TRADITIONAL)
910                .help(translate!("ptx-help-traditional"))
911                .action(ArgAction::SetTrue),
912        )
913        .arg(
914            Arg::new(options::FLAG_TRUNCATION)
915                .short('F')
916                .long(options::FLAG_TRUNCATION)
917                .help(translate!("ptx-help-flag-truncation"))
918                .value_name("STRING"),
919        )
920        .arg(
921            Arg::new(options::MACRO_NAME)
922                .short('M')
923                .long(options::MACRO_NAME)
924                .help(translate!("ptx-help-macro-name"))
925                .value_name("STRING"),
926        )
927        .arg(
928            Arg::new(options::FORMAT)
929                .long(options::FORMAT)
930                .hide(true)
931                .value_parser(["roff", "tex"])
932                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]),
933        )
934        .arg(
935            Arg::new(options::format::ROFF)
936                .short('O')
937                .help(translate!("ptx-help-roff"))
938                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
939                .action(ArgAction::SetTrue),
940        )
941        .arg(
942            Arg::new(options::format::TEX)
943                .short('T')
944                .help(translate!("ptx-help-tex"))
945                .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
946                .action(ArgAction::SetTrue),
947        )
948        .arg(
949            Arg::new(options::RIGHT_SIDE_REFS)
950                .short('R')
951                .long(options::RIGHT_SIDE_REFS)
952                .help(translate!("ptx-help-right-side-refs"))
953                .action(ArgAction::SetTrue),
954        )
955        .arg(
956            Arg::new(options::SENTENCE_REGEXP)
957                .short('S')
958                .long(options::SENTENCE_REGEXP)
959                .help(translate!("ptx-help-sentence-regexp"))
960                .value_name("REGEXP"),
961        )
962        .arg(
963            Arg::new(options::WORD_REGEXP)
964                .short('W')
965                .long(options::WORD_REGEXP)
966                .help(translate!("ptx-help-word-regexp"))
967                .value_name("REGEXP"),
968        )
969        .arg(
970            Arg::new(options::BREAK_FILE)
971                .short('b')
972                .long(options::BREAK_FILE)
973                .help(translate!("ptx-help-break-file"))
974                .value_name("FILE")
975                .value_hint(clap::ValueHint::FilePath)
976                .value_parser(clap::value_parser!(OsString)),
977        )
978        .arg(
979            Arg::new(options::IGNORE_CASE)
980                .short('f')
981                .long(options::IGNORE_CASE)
982                .help(translate!("ptx-help-ignore-case"))
983                .action(ArgAction::SetTrue),
984        )
985        .arg(
986            Arg::new(options::GAP_SIZE)
987                .short('g')
988                .long(options::GAP_SIZE)
989                .help(translate!("ptx-help-gap-size"))
990                .value_name("NUMBER"),
991        )
992        .arg(
993            Arg::new(options::IGNORE_FILE)
994                .short('i')
995                .long(options::IGNORE_FILE)
996                .help(translate!("ptx-help-ignore-file"))
997                .value_name("FILE")
998                .value_hint(clap::ValueHint::FilePath)
999                .value_parser(clap::value_parser!(OsString)),
1000        )
1001        .arg(
1002            Arg::new(options::ONLY_FILE)
1003                .short('o')
1004                .long(options::ONLY_FILE)
1005                .help(translate!("ptx-help-only-file"))
1006                .value_name("FILE")
1007                .value_hint(clap::ValueHint::FilePath)
1008                .value_parser(clap::value_parser!(OsString)),
1009        )
1010        .arg(
1011            Arg::new(options::REFERENCES)
1012                .short('r')
1013                .long(options::REFERENCES)
1014                .help(translate!("ptx-help-references"))
1015                .value_name("FILE")
1016                .action(ArgAction::SetTrue),
1017        )
1018        .arg(
1019            Arg::new(options::WIDTH)
1020                .short('w')
1021                .long(options::WIDTH)
1022                .help(translate!("ptx-help-width"))
1023                .value_name("NUMBER"),
1024        )
1025}