coreutils_rs/ptx/
core.rs

1use std::collections::HashSet;
2use std::io::{self, BufRead, Write};
3
4/// Output format for ptx.
5#[derive(Clone, Debug, PartialEq)]
6pub enum OutputFormat {
7    /// Default GNU ptx output format (roff-like).
8    Roff,
9    /// TeX output format.
10    Tex,
11    /// Dumb terminal / plain text format.
12    Plain,
13}
14
15/// Configuration for the ptx command.
16#[derive(Clone, Debug)]
17pub struct PtxConfig {
18    pub width: usize,
19    pub ignore_case: bool,
20    pub auto_reference: bool,
21    pub traditional: bool,
22    pub format: OutputFormat,
23    pub ignore_words: HashSet<String>,
24    pub only_words: Option<HashSet<String>>,
25    pub references: bool,
26    pub gap_size: usize,
27    pub right_reference: bool,
28    pub sentence_regexp: Option<String>,
29    pub word_regexp: Option<String>,
30    pub flag_truncation: Option<String>,
31    pub macro_name: Option<String>,
32}
33
34impl Default for PtxConfig {
35    fn default() -> Self {
36        Self {
37            width: 72,
38            ignore_case: false,
39            auto_reference: false,
40            traditional: false,
41            format: OutputFormat::Plain,
42            ignore_words: HashSet::new(),
43            only_words: None,
44            references: false,
45            gap_size: 3,
46            right_reference: false,
47            sentence_regexp: None,
48            word_regexp: None,
49            flag_truncation: None,
50            macro_name: None,
51        }
52    }
53}
54
55/// Pre-normalized word sets for O(1) case-insensitive lookup.
56struct NormalizedSets {
57    ignore_lower: HashSet<String>,
58    only_lower: Option<HashSet<String>>,
59}
60
61impl NormalizedSets {
62    fn new(config: &PtxConfig) -> Self {
63        if config.ignore_case {
64            let ignore_lower = config
65                .ignore_words
66                .iter()
67                .map(|w| w.to_lowercase())
68                .collect();
69            let only_lower = config
70                .only_words
71                .as_ref()
72                .map(|s| s.iter().map(|w| w.to_lowercase()).collect());
73            Self {
74                ignore_lower,
75                only_lower,
76            }
77        } else {
78            // No normalization needed - we'll use the original sets directly
79            Self {
80                ignore_lower: HashSet::new(),
81                only_lower: None,
82            }
83        }
84    }
85}
86
87/// Compact KWIC entry — no owned strings, just indices.
88struct KwicEntry {
89    line_idx: u32,
90    word_start: u32,
91    word_len: u16,
92}
93
94/// Computed layout fields for a KWIC entry — all borrowed slices.
95struct LayoutFields<'a> {
96    tail: &'a str,
97    before: &'a str,
98    keyafter: &'a str,
99    keyword: &'a str,
100    after: &'a str,
101    head: &'a str,
102    tail_truncated: bool,
103    before_truncated: bool,
104    keyafter_truncated: bool,
105    head_truncated: bool,
106}
107
108// Static padding buffer for fast space-filling
109const SPACES: &[u8; 256] = b"                                                                                                                                                                                                                                                                ";
110
111/// Write `n` spaces to the writer.
112#[inline]
113fn write_spaces<W: Write>(out: &mut W, n: usize) -> io::Result<()> {
114    let mut remaining = n;
115    while remaining > 0 {
116        let chunk = remaining.min(SPACES.len());
117        out.write_all(&SPACES[..chunk])?;
118        remaining -= chunk;
119    }
120    Ok(())
121}
122
123/// Extract words from a line of text.
124///
125/// GNU ptx's default word regex is effectively `[a-zA-Z][a-zA-Z0-9]*`.
126fn extract_words(line: &str) -> Vec<(usize, &str)> {
127    let mut words = Vec::new();
128    let bytes = line.as_bytes();
129    let len = bytes.len();
130    let mut i = 0;
131
132    while i < len {
133        if bytes[i].is_ascii_alphabetic() {
134            let start = i;
135            i += 1;
136            while i < len && bytes[i].is_ascii_alphanumeric() {
137                i += 1;
138            }
139            words.push((start, &line[start..i]));
140        } else {
141            i += 1;
142        }
143    }
144
145    words
146}
147
148/// Check if a word should be indexed, using pre-normalized sets.
149#[inline]
150fn should_index(word: &str, config: &PtxConfig, norm: &NormalizedSets) -> bool {
151    if config.ignore_case {
152        // Use pre-normalized lowercase sets for O(1) lookup
153        if let Some(ref only) = norm.only_lower {
154            // Must be in only-set
155            let lower = word.to_ascii_lowercase();
156            return only.contains(lower.as_str());
157        }
158        let lower = word.to_ascii_lowercase();
159        !norm.ignore_lower.contains(lower.as_str())
160    } else {
161        if let Some(ref only) = config.only_words {
162            return only.contains(word);
163        }
164        !config.ignore_words.contains(word)
165    }
166}
167
168/// Generate KWIC entries and compute max_word_length in a single pass.
169fn generate_entries(
170    lines: &[(String, String)],
171    config: &PtxConfig,
172    norm: &NormalizedSets,
173) -> (Vec<KwicEntry>, usize) {
174    let mut entries = Vec::new();
175    let mut max_word_length: usize = 0;
176
177    for (line_idx, (_reference, line)) in lines.iter().enumerate() {
178        let words = extract_words(line);
179
180        for &(word_start, word) in &words {
181            let wlen = word.len();
182            if wlen > max_word_length {
183                max_word_length = wlen;
184            }
185
186            if !should_index(word, config, norm) {
187                continue;
188            }
189
190            debug_assert!(
191                wlen <= u16::MAX as usize,
192                "word length {} exceeds u16::MAX",
193                wlen
194            );
195            entries.push(KwicEntry {
196                line_idx: line_idx as u32,
197                word_start: word_start as u32,
198                word_len: wlen as u16,
199            });
200        }
201    }
202
203    // Sort by keyword (case-insensitive if requested), then by reference.
204    // Must use stable sort: glibc's qsort is merge-sort (stable), so GNU ptx
205    // preserves input order for entries with equal keywords and references.
206    if config.ignore_case {
207        entries.sort_by(|a, b| {
208            let a_line = &lines[a.line_idx as usize].1;
209            let b_line = &lines[b.line_idx as usize].1;
210            let a_kw = &a_line[a.word_start as usize..a.word_start as usize + a.word_len as usize];
211            let b_kw = &b_line[b.word_start as usize..b.word_start as usize + b.word_len as usize];
212            a_kw.bytes()
213                .map(|c| c.to_ascii_lowercase())
214                .cmp(b_kw.bytes().map(|c| c.to_ascii_lowercase()))
215                .then_with(|| {
216                    lines[a.line_idx as usize]
217                        .0
218                        .cmp(&lines[b.line_idx as usize].0)
219                })
220        });
221    } else {
222        entries.sort_by(|a, b| {
223            let a_line = &lines[a.line_idx as usize].1;
224            let b_line = &lines[b.line_idx as usize].1;
225            let a_kw = &a_line[a.word_start as usize..a.word_start as usize + a.word_len as usize];
226            let b_kw = &b_line[b.word_start as usize..b.word_start as usize + b.word_len as usize];
227            a_kw.cmp(b_kw).then_with(|| {
228                lines[a.line_idx as usize]
229                    .0
230                    .cmp(&lines[b.line_idx as usize].0)
231            })
232        });
233    }
234
235    (entries, max_word_length)
236}
237
238/// Advance past one "word" or one non-word char.
239#[inline]
240fn skip_something(s: &str, pos: usize) -> usize {
241    if pos >= s.len() {
242        return pos;
243    }
244    let bytes = s.as_bytes();
245    if bytes[pos].is_ascii_alphabetic() {
246        let mut p = pos + 1;
247        while p < s.len() && bytes[p].is_ascii_alphanumeric() {
248            p += 1;
249        }
250        p
251    } else {
252        pos + 1
253    }
254}
255
256/// Skip whitespace forward from position.
257#[inline]
258fn skip_white(s: &str, pos: usize) -> usize {
259    let bytes = s.as_bytes();
260    let mut p = pos;
261    while p < s.len() && bytes[p].is_ascii_whitespace() {
262        p += 1;
263    }
264    p
265}
266
267/// Skip whitespace backward from position (exclusive end).
268#[inline]
269fn skip_white_backwards(s: &str, pos: usize, start: usize) -> usize {
270    let bytes = s.as_bytes();
271    let mut p = pos;
272    while p > start && bytes[p - 1].is_ascii_whitespace() {
273        p -= 1;
274    }
275    p
276}
277
278/// Compute the layout fields for a KWIC entry.
279fn compute_layout<'a>(
280    sentence: &'a str,
281    word_start: usize,
282    keyword_len: usize,
283    ref_str: &str,
284    config: &PtxConfig,
285    max_word_length: usize,
286    ref_max_width: usize,
287) -> LayoutFields<'a> {
288    let total_width = config.width;
289    let gap = config.gap_size;
290    let trunc_len = 1; // "/" is 1 char
291
292    let ref_width = if ref_str.is_empty() || config.right_reference {
293        0
294    } else {
295        ref_max_width + gap
296    };
297
298    let line_width = if total_width > ref_width {
299        total_width - ref_width
300    } else {
301        total_width
302    };
303
304    let half_line_width = line_width / 2;
305
306    let before_max_width = if half_line_width > gap + 2 * trunc_len {
307        half_line_width - gap - 2 * trunc_len
308    } else {
309        0
310    };
311    let keyafter_max_width = if half_line_width > 2 * trunc_len {
312        half_line_width - 2 * trunc_len
313    } else {
314        0
315    };
316
317    let line_len = sentence.len();
318
319    // ========== Step 1: Compute keyafter ==========
320    let keyafter_start = word_start;
321    let mut keyafter_end = word_start + keyword_len;
322    {
323        let mut cursor = keyafter_end;
324        while cursor < line_len && cursor <= keyafter_start + keyafter_max_width {
325            keyafter_end = cursor;
326            cursor = skip_something(sentence, cursor);
327        }
328        if cursor <= keyafter_start + keyafter_max_width {
329            keyafter_end = cursor;
330        }
331    }
332    let mut keyafter_truncation = keyafter_end < line_len;
333    keyafter_end = skip_white_backwards(sentence, keyafter_end, keyafter_start);
334
335    // ========== Compute left_field_start ==========
336    let left_context_start: usize = 0;
337    let left_field_start = if word_start > half_line_width + max_word_length {
338        let lfs = word_start - (half_line_width + max_word_length);
339        skip_something(sentence, lfs)
340    } else {
341        left_context_start
342    };
343
344    // ========== Step 2: Compute before ==========
345    let mut before_start: usize = left_field_start;
346    let mut before_end = keyafter_start;
347    before_end = skip_white_backwards(sentence, before_end, before_start);
348
349    while before_start + before_max_width < before_end {
350        before_start = skip_something(sentence, before_start);
351    }
352
353    let mut before_truncation = {
354        let cursor = skip_white_backwards(sentence, before_start, 0);
355        cursor > left_context_start
356    };
357
358    before_start = skip_white(sentence, before_start);
359    let before_len = if before_end > before_start {
360        before_end - before_start
361    } else {
362        0
363    };
364
365    // ========== Step 3: Compute tail ==========
366    let tail_max_width_raw: isize = before_max_width as isize - before_len as isize - gap as isize;
367    let mut tail_start: usize = 0;
368    let mut tail_end: usize = 0;
369    let mut tail_truncation = false;
370    let mut has_tail = false;
371
372    if tail_max_width_raw > 0 {
373        let tail_max_width = tail_max_width_raw as usize;
374        tail_start = skip_white(sentence, keyafter_end);
375        tail_end = tail_start;
376        let mut cursor = tail_end;
377        while cursor < line_len && cursor < tail_start + tail_max_width {
378            tail_end = cursor;
379            cursor = skip_something(sentence, cursor);
380        }
381        if cursor < tail_start + tail_max_width {
382            tail_end = cursor;
383        }
384
385        if tail_end > tail_start {
386            has_tail = true;
387            keyafter_truncation = false;
388            tail_truncation = tail_end < line_len;
389        } else {
390            tail_truncation = false;
391        }
392
393        tail_end = skip_white_backwards(sentence, tail_end, tail_start);
394    }
395
396    // ========== Step 4: Compute head ==========
397    let keyafter_len = if keyafter_end > keyafter_start {
398        keyafter_end - keyafter_start
399    } else {
400        0
401    };
402    let head_max_width_raw: isize =
403        keyafter_max_width as isize - keyafter_len as isize - gap as isize;
404    let mut head_start: usize = 0;
405    let mut head_end: usize = 0;
406    let mut head_truncation = false;
407    let mut has_head = false;
408
409    if head_max_width_raw > 0 {
410        let head_max_width = head_max_width_raw as usize;
411        head_end = skip_white_backwards(sentence, before_start, 0);
412
413        head_start = left_field_start;
414        while head_start + head_max_width < head_end {
415            head_start = skip_something(sentence, head_start);
416        }
417
418        if head_end > head_start {
419            has_head = true;
420            before_truncation = false;
421            head_truncation = {
422                let cursor = skip_white_backwards(sentence, head_start, 0);
423                cursor > left_context_start
424            };
425        } else {
426            head_truncation = false;
427        }
428
429        if head_end > head_start {
430            head_start = skip_white(sentence, head_start);
431        }
432    }
433
434    // ========== Extract text fields as slices ==========
435    let before_text = if before_len > 0 {
436        &sentence[before_start..before_end]
437    } else {
438        ""
439    };
440    let keyafter_text = if keyafter_end > keyafter_start {
441        &sentence[keyafter_start..keyafter_end]
442    } else {
443        ""
444    };
445    let tail_text = if has_tail && tail_end > tail_start {
446        &sentence[tail_start..tail_end]
447    } else {
448        ""
449    };
450    let head_text = if has_head && head_end > head_start {
451        &sentence[head_start..head_end]
452    } else {
453        ""
454    };
455
456    let keyword_text = &sentence[word_start..word_start + keyword_len];
457    let after_start = word_start + keyword_len;
458    let after_text = if keyafter_end > after_start {
459        &sentence[after_start..keyafter_end]
460    } else {
461        ""
462    };
463
464    LayoutFields {
465        tail: tail_text,
466        before: before_text,
467        keyafter: keyafter_text,
468        keyword: keyword_text,
469        after: after_text,
470        head: head_text,
471        tail_truncated: tail_truncation,
472        before_truncated: before_truncation,
473        keyafter_truncated: keyafter_truncation,
474        head_truncated: head_truncation,
475    }
476}
477
478/// Write a KWIC entry in plain text format directly to the output.
479fn write_plain<W: Write>(
480    out: &mut W,
481    ref_str: &str,
482    config: &PtxConfig,
483    layout: &LayoutFields<'_>,
484    ref_max_width: usize,
485) -> io::Result<()> {
486    let total_width = config.width;
487    let gap = config.gap_size;
488    let trunc_str = config.flag_truncation.as_deref().unwrap_or("/");
489    let trunc_len = trunc_str.len();
490
491    let ref_width = if ref_str.is_empty() || config.right_reference {
492        0
493    } else {
494        ref_max_width + gap
495    };
496
497    let line_width = if total_width > ref_width {
498        total_width - ref_width
499    } else {
500        total_width
501    };
502
503    let half_line_width = line_width / 2;
504
505    let before_trunc_len = if layout.before_truncated {
506        trunc_len
507    } else {
508        0
509    };
510    let keyafter_trunc_len = if layout.keyafter_truncated {
511        trunc_len
512    } else {
513        0
514    };
515    let tail_trunc_len = if layout.tail_truncated { trunc_len } else { 0 };
516    let head_trunc_len = if layout.head_truncated { trunc_len } else { 0 };
517
518    // Reference prefix (if not right_reference)
519    if !config.right_reference {
520        if !ref_str.is_empty() && config.auto_reference {
521            out.write_all(ref_str.as_bytes())?;
522            out.write_all(b":")?;
523            let ref_total = ref_str.len() + 1;
524            let ref_pad_total = ref_max_width + gap;
525            write_spaces(out, ref_pad_total.saturating_sub(ref_total))?;
526        } else if !ref_str.is_empty() {
527            out.write_all(ref_str.as_bytes())?;
528            let ref_pad_total = ref_max_width + gap;
529            write_spaces(out, ref_pad_total.saturating_sub(ref_str.len()))?;
530        } else {
531            write_spaces(out, gap)?;
532        }
533    }
534
535    // Left half: [tail][tail_trunc] ... padding ... [before_trunc][before]
536    if !layout.tail.is_empty() {
537        out.write_all(layout.tail.as_bytes())?;
538        if layout.tail_truncated {
539            out.write_all(trunc_str.as_bytes())?;
540        }
541        let tail_used = layout.tail.len() + tail_trunc_len;
542        let before_used = layout.before.len() + before_trunc_len;
543        let padding = half_line_width
544            .saturating_sub(gap)
545            .saturating_sub(tail_used)
546            .saturating_sub(before_used);
547        write_spaces(out, padding)?;
548    } else {
549        let before_used = layout.before.len() + before_trunc_len;
550        let padding = half_line_width
551            .saturating_sub(gap)
552            .saturating_sub(before_used);
553        write_spaces(out, padding)?;
554    }
555
556    if layout.before_truncated {
557        out.write_all(trunc_str.as_bytes())?;
558    }
559    out.write_all(layout.before.as_bytes())?;
560
561    // Gap
562    write_spaces(out, gap)?;
563
564    // Right half: [keyafter][keyafter_trunc] ... padding ... [head_trunc][head]
565    out.write_all(layout.keyafter.as_bytes())?;
566    if layout.keyafter_truncated {
567        out.write_all(trunc_str.as_bytes())?;
568    }
569
570    if !layout.head.is_empty() {
571        let keyafter_used = layout.keyafter.len() + keyafter_trunc_len;
572        let head_used = layout.head.len() + head_trunc_len;
573        let padding = half_line_width
574            .saturating_sub(keyafter_used)
575            .saturating_sub(head_used);
576        write_spaces(out, padding)?;
577        if layout.head_truncated {
578            out.write_all(trunc_str.as_bytes())?;
579        }
580        out.write_all(layout.head.as_bytes())?;
581    } else if !ref_str.is_empty() && config.right_reference {
582        let keyafter_used = layout.keyafter.len() + keyafter_trunc_len;
583        let padding = half_line_width.saturating_sub(keyafter_used);
584        write_spaces(out, padding)?;
585    }
586
587    // Reference on the right (if right_reference)
588    if !ref_str.is_empty() && config.right_reference {
589        write_spaces(out, gap)?;
590        out.write_all(ref_str.as_bytes())?;
591    }
592
593    out.write_all(b"\n")
594}
595
596/// Escape a string for roff output (backslashes and quotes).
597fn escape_roff(s: &str) -> String {
598    s.replace('\\', "\\\\").replace('"', "\\\"")
599}
600
601/// Write a KWIC entry in roff format directly to output.
602fn write_roff<W: Write>(
603    out: &mut W,
604    ref_str: &str,
605    config: &PtxConfig,
606    layout: &LayoutFields<'_>,
607    escaped_trunc: &str,
608) -> io::Result<()> {
609    let macro_name = config.macro_name.as_deref().unwrap_or("xx");
610
611    out.write_all(b".")?;
612    out.write_all(macro_name.as_bytes())?;
613
614    // tail
615    out.write_all(b" \"")?;
616    out.write_all(escape_roff(layout.tail).as_bytes())?;
617    if layout.tail_truncated {
618        out.write_all(escaped_trunc.as_bytes())?;
619    }
620
621    // before
622    out.write_all(b"\" \"")?;
623    if layout.before_truncated {
624        out.write_all(escaped_trunc.as_bytes())?;
625    }
626    out.write_all(escape_roff(layout.before).as_bytes())?;
627
628    // keyafter
629    out.write_all(b"\" \"")?;
630    out.write_all(escape_roff(layout.keyafter).as_bytes())?;
631    if layout.keyafter_truncated {
632        out.write_all(escaped_trunc.as_bytes())?;
633    }
634
635    // head
636    out.write_all(b"\" \"")?;
637    if layout.head_truncated {
638        out.write_all(escaped_trunc.as_bytes())?;
639    }
640    out.write_all(escape_roff(layout.head).as_bytes())?;
641    out.write_all(b"\"")?;
642
643    // reference
644    if !ref_str.is_empty() {
645        out.write_all(b" \"")?;
646        out.write_all(escape_roff(ref_str).as_bytes())?;
647        out.write_all(b"\"")?;
648    }
649
650    out.write_all(b"\n")
651}
652
653/// Escape a string for TeX output.
654fn escape_tex(s: &str) -> String {
655    let mut result = String::with_capacity(s.len());
656    for ch in s.chars() {
657        match ch {
658            '\\' => result.push_str("\\backslash "),
659            '{' => result.push_str("\\{"),
660            '}' => result.push_str("\\}"),
661            '$' => result.push_str("\\$"),
662            '&' => result.push_str("\\&"),
663            '#' => result.push_str("\\#"),
664            '_' => result.push_str("\\_"),
665            '^' => result.push_str("\\^{}"),
666            '~' => result.push_str("\\~{}"),
667            '%' => result.push_str("\\%"),
668            _ => result.push(ch),
669        }
670    }
671    result
672}
673
674/// Write a KWIC entry in TeX format directly to output.
675fn write_tex<W: Write>(
676    out: &mut W,
677    ref_str: &str,
678    config: &PtxConfig,
679    layout: &LayoutFields<'_>,
680) -> io::Result<()> {
681    let macro_name = config.macro_name.as_deref().unwrap_or("xx");
682
683    out.write_all(b"\\")?;
684    out.write_all(macro_name.as_bytes())?;
685    out.write_all(b" {")?;
686    out.write_all(escape_tex(layout.tail).as_bytes())?;
687    out.write_all(b"}{")?;
688    out.write_all(escape_tex(layout.before).as_bytes())?;
689    out.write_all(b"}{")?;
690    out.write_all(escape_tex(layout.keyword).as_bytes())?;
691    out.write_all(b"}{")?;
692    out.write_all(escape_tex(layout.after).as_bytes())?;
693    out.write_all(b"}{")?;
694    out.write_all(escape_tex(layout.head).as_bytes())?;
695    out.write_all(b"}")?;
696
697    if !ref_str.is_empty() {
698        out.write_all(b"{")?;
699        out.write_all(escape_tex(ref_str).as_bytes())?;
700        out.write_all(b"}")?;
701    }
702
703    out.write_all(b"\n")
704}
705
706/// Process lines from a single source, grouping them into sentence contexts.
707fn process_lines_into_contexts(
708    content: &str,
709    filename: Option<&str>,
710    config: &PtxConfig,
711    lines_out: &mut Vec<(String, String)>,
712    global_line_num: &mut usize,
713) {
714    let mut current_text = String::new();
715    let mut context_ref = String::new();
716    let mut first_line_of_context = true;
717
718    for line in content.lines() {
719        *global_line_num += 1;
720
721        let reference = if config.auto_reference {
722            match filename {
723                Some(name) => format!("{}:{}", name, global_line_num),
724                None => format!("{}", global_line_num),
725            }
726        } else {
727            String::new()
728        };
729
730        if first_line_of_context {
731            context_ref = reference;
732            first_line_of_context = false;
733        }
734
735        if !current_text.is_empty() {
736            current_text.push(' ');
737        }
738        current_text.push_str(line);
739
740        let trimmed = line.trim_end();
741        let ends_with_terminator =
742            trimmed.ends_with('.') || trimmed.ends_with('?') || trimmed.ends_with('!');
743
744        if ends_with_terminator || line.is_empty() {
745            if !current_text.trim().is_empty() {
746                lines_out.push((context_ref.clone(), current_text.clone()));
747            }
748            current_text.clear();
749            first_line_of_context = true;
750        }
751    }
752
753    if !current_text.trim().is_empty() {
754        lines_out.push((context_ref.clone(), current_text.clone()));
755    }
756}
757
758fn format_and_write<W: Write>(
759    lines: &[(String, String)],
760    output: &mut W,
761    config: &PtxConfig,
762) -> io::Result<()> {
763    let norm = NormalizedSets::new(config);
764    let (entries, max_word_length) = generate_entries(lines, config, &norm);
765
766    // Compute maximum reference width
767    let ref_max_width = if config.auto_reference || config.references {
768        entries
769            .iter()
770            .map(|e| lines[e.line_idx as usize].0.len())
771            .max()
772            .unwrap_or(0)
773    } else {
774        0
775    };
776
777    // Pre-compute escaped truncation flag for roff mode (avoids per-entry allocation)
778    let escaped_trunc = if config.format == OutputFormat::Roff {
779        escape_roff(config.flag_truncation.as_deref().unwrap_or("/"))
780    } else {
781        String::new()
782    };
783
784    for entry in &entries {
785        let line_data = &lines[entry.line_idx as usize];
786        let ref_str = if config.auto_reference || config.references {
787            &line_data.0
788        } else {
789            ""
790        };
791        let sentence = &line_data.1;
792        let word_start = entry.word_start as usize;
793        let keyword_len = entry.word_len as usize;
794
795        let layout = compute_layout(
796            sentence,
797            word_start,
798            keyword_len,
799            ref_str,
800            config,
801            max_word_length,
802            ref_max_width,
803        );
804
805        match config.format {
806            OutputFormat::Plain => write_plain(output, ref_str, config, &layout, ref_max_width)?,
807            OutputFormat::Roff => {
808                write_roff(output, ref_str, config, &layout, &escaped_trunc)?;
809            }
810            OutputFormat::Tex => write_tex(output, ref_str, config, &layout)?,
811        }
812    }
813
814    Ok(())
815}
816
817/// Generate a permuted index from input.
818pub fn generate_ptx<R: BufRead, W: Write>(
819    mut input: R,
820    output: &mut W,
821    config: &PtxConfig,
822) -> io::Result<()> {
823    let mut content = String::new();
824    input.read_to_string(&mut content)?;
825
826    let mut lines: Vec<(String, String)> = Vec::new();
827    let mut global_line_num = 0usize;
828    process_lines_into_contexts(&content, None, config, &mut lines, &mut global_line_num);
829
830    format_and_write(&lines, output, config)
831}
832
833/// Generate a permuted index from multiple named file contents.
834pub fn generate_ptx_multi<W: Write>(
835    file_contents: &[(Option<String>, String)],
836    output: &mut W,
837    config: &PtxConfig,
838) -> io::Result<()> {
839    let mut lines: Vec<(String, String)> = Vec::new();
840    let mut global_line_num = 0usize;
841
842    for (filename, content) in file_contents {
843        process_lines_into_contexts(
844            content,
845            filename.as_deref(),
846            config,
847            &mut lines,
848            &mut global_line_num,
849        );
850    }
851
852    format_and_write(&lines, output, config)
853}
854
855/// Read a word list file (one word per line) into a HashSet.
856pub fn read_word_file(path: &str) -> io::Result<HashSet<String>> {
857    let content = std::fs::read_to_string(path)?;
858    Ok(content
859        .lines()
860        .map(|l| l.trim().to_string())
861        .filter(|l| !l.is_empty())
862        .collect())
863}
coreutils_rs/ptx/core.rs

coreutils_rs/ptx/
core.rs