1use std::collections::HashSet;
2use std::io::{self, BufRead, Write};
3
4#[derive(Clone, Debug, PartialEq)]
6pub enum OutputFormat {
7 Roff,
9 Tex,
11 Plain,
13}
14
15#[derive(Clone, Debug)]
17pub struct PtxConfig {
18 pub width: usize,
19 pub ignore_case: bool,
20 pub auto_reference: bool,
21 pub traditional: bool,
22 pub format: OutputFormat,
23 pub ignore_words: HashSet<String>,
24 pub only_words: Option<HashSet<String>>,
25 pub references: bool,
26 pub gap_size: usize,
27 pub right_reference: bool,
28 pub sentence_regexp: Option<String>,
29 pub word_regexp: Option<String>,
30 pub flag_truncation: Option<String>,
31 pub macro_name: Option<String>,
32}
33
34impl Default for PtxConfig {
35 fn default() -> Self {
36 Self {
37 width: 72,
38 ignore_case: false,
39 auto_reference: false,
40 traditional: false,
41 format: OutputFormat::Plain,
42 ignore_words: HashSet::new(),
43 only_words: None,
44 references: false,
45 gap_size: 3,
46 right_reference: false,
47 sentence_regexp: None,
48 word_regexp: None,
49 flag_truncation: None,
50 macro_name: None,
51 }
52 }
53}
54
55#[derive(Clone, Debug)]
57struct KwicEntry {
58 reference: String,
60 full_line: String,
62 word_start: usize,
64 keyword: String,
66 sort_key: String,
68}
69
70struct LayoutFields {
80 tail: String,
81 before: String,
82 keyafter: String,
83 keyword: String,
84 after: String,
85 head: String,
86 tail_truncated: bool,
87 before_truncated: bool,
88 keyafter_truncated: bool,
89 head_truncated: bool,
90}
91
92fn extract_words(line: &str) -> Vec<(usize, &str)> {
99 let mut words = Vec::new();
100 let bytes = line.as_bytes();
101 let len = bytes.len();
102 let mut i = 0;
103
104 while i < len {
105 if bytes[i].is_ascii_alphabetic() {
107 let start = i;
108 i += 1;
109 while i < len && bytes[i].is_ascii_alphanumeric() {
111 i += 1;
112 }
113 words.push((start, &line[start..i]));
114 } else {
115 i += 1;
116 }
117 }
118
119 words
120}
121
122fn should_index(word: &str, config: &PtxConfig) -> bool {
124 let check_word = if config.ignore_case {
125 word.to_lowercase()
126 } else {
127 word.to_string()
128 };
129
130 if let Some(ref only) = config.only_words {
132 if config.ignore_case {
133 return only.iter().any(|w| w.to_lowercase() == check_word);
134 }
135 return only.contains(&check_word);
136 }
137
138 if config.ignore_case {
140 !config
141 .ignore_words
142 .iter()
143 .any(|w| w.to_lowercase() == check_word)
144 } else {
145 !config.ignore_words.contains(&check_word)
146 }
147}
148
149fn generate_entries(lines: &[(String, String)], config: &PtxConfig) -> Vec<KwicEntry> {
151 let mut entries = Vec::new();
152
153 for (reference, line) in lines {
154 let words = extract_words(line);
155
156 for &(word_start, word) in &words {
157 if !should_index(word, config) {
158 continue;
159 }
160
161 let sort_key = if config.ignore_case {
162 word.to_lowercase()
163 } else {
164 word.to_string()
165 };
166
167 entries.push(KwicEntry {
168 reference: reference.clone(),
169 full_line: line.clone(),
170 word_start,
171 keyword: word.to_string(),
172 sort_key,
173 });
174 }
175 }
176
177 entries.sort_by(|a, b| {
179 a.sort_key
180 .cmp(&b.sort_key)
181 .then_with(|| a.reference.cmp(&b.reference))
182 });
183
184 entries
185}
186
187fn skip_something(s: &str, pos: usize) -> usize {
193 if pos >= s.len() {
194 return pos;
195 }
196 let bytes = s.as_bytes();
197 if bytes[pos].is_ascii_alphabetic() {
198 let mut p = pos + 1;
200 while p < s.len() && bytes[p].is_ascii_alphanumeric() {
201 p += 1;
202 }
203 p
204 } else {
205 pos + 1
207 }
208}
209
210fn skip_white(s: &str, pos: usize) -> usize {
212 let bytes = s.as_bytes();
213 let mut p = pos;
214 while p < s.len() && bytes[p].is_ascii_whitespace() {
215 p += 1;
216 }
217 p
218}
219
220fn skip_white_backwards(s: &str, pos: usize, start: usize) -> usize {
222 let bytes = s.as_bytes();
223 let mut p = pos;
224 while p > start && bytes[p - 1].is_ascii_whitespace() {
225 p -= 1;
226 }
227 p
228}
229
230fn compute_layout(
235 entry: &KwicEntry,
236 config: &PtxConfig,
237 max_word_length: usize,
238 ref_max_width: usize,
239) -> LayoutFields {
240 let ref_str = if config.auto_reference || config.references {
241 &entry.reference
242 } else {
243 ""
244 };
245
246 let total_width = config.width;
247 let gap = config.gap_size;
248 let trunc_len = 1; let ref_width = if ref_str.is_empty() || config.right_reference {
252 0
253 } else {
254 ref_max_width + gap
255 };
256
257 let line_width = if total_width > ref_width {
258 total_width - ref_width
259 } else {
260 total_width
261 };
262
263 let half_line_width = line_width / 2;
264
265 let before_max_width = if half_line_width > gap + 2 * trunc_len {
268 half_line_width - gap - 2 * trunc_len
269 } else {
270 0
271 };
272 let keyafter_max_width = if half_line_width > 2 * trunc_len {
273 half_line_width - 2 * trunc_len
274 } else {
275 0
276 };
277
278 let sentence = &entry.full_line;
279 let word_start = entry.word_start;
280 let keyword_len = entry.keyword.len();
281 let line_len = sentence.len();
282
283 let keyafter_start = word_start;
285 let mut keyafter_end = word_start + keyword_len;
286 {
287 let mut cursor = keyafter_end;
288 while cursor < line_len && cursor <= keyafter_start + keyafter_max_width {
289 keyafter_end = cursor;
290 cursor = skip_something(sentence, cursor);
291 }
292 if cursor <= keyafter_start + keyafter_max_width {
293 keyafter_end = cursor;
294 }
295 }
296 let mut keyafter_truncation = keyafter_end < line_len;
297 keyafter_end = skip_white_backwards(sentence, keyafter_end, keyafter_start);
298
299 let left_context_start: usize = 0;
301 let left_field_start = if word_start > half_line_width + max_word_length {
302 let mut lfs = word_start - (half_line_width + max_word_length);
303 lfs = skip_something(sentence, lfs);
304 lfs
305 } else {
306 left_context_start
307 };
308
309 let mut before_start: usize = left_field_start;
311 let mut before_end = keyafter_start;
312 before_end = skip_white_backwards(sentence, before_end, before_start);
313
314 while before_start + before_max_width < before_end {
315 before_start = skip_something(sentence, before_start);
316 }
317
318 let mut before_truncation = {
319 let cursor = skip_white_backwards(sentence, before_start, 0);
320 cursor > left_context_start
321 };
322
323 before_start = skip_white(sentence, before_start);
324 let before_len = if before_end > before_start {
325 before_end - before_start
326 } else {
327 0
328 };
329
330 let tail_max_width_raw: isize = before_max_width as isize - before_len as isize - gap as isize;
332 let mut tail_start: usize = 0;
333 let mut tail_end: usize = 0;
334 let mut tail_truncation = false;
335 let mut has_tail = false;
336
337 if tail_max_width_raw > 0 {
338 let tail_max_width = tail_max_width_raw as usize;
339 tail_start = skip_white(sentence, keyafter_end);
340 tail_end = tail_start;
341 let mut cursor = tail_end;
342 while cursor < line_len && cursor < tail_start + tail_max_width {
343 tail_end = cursor;
344 cursor = skip_something(sentence, cursor);
345 }
346 if cursor < tail_start + tail_max_width {
347 tail_end = cursor;
348 }
349
350 if tail_end > tail_start {
351 has_tail = true;
352 keyafter_truncation = false;
353 tail_truncation = tail_end < line_len;
354 } else {
355 tail_truncation = false;
356 }
357
358 tail_end = skip_white_backwards(sentence, tail_end, tail_start);
359 }
360
361 let keyafter_len = if keyafter_end > keyafter_start {
363 keyafter_end - keyafter_start
364 } else {
365 0
366 };
367 let head_max_width_raw: isize =
368 keyafter_max_width as isize - keyafter_len as isize - gap as isize;
369 let mut head_start: usize = 0;
370 let mut head_end: usize = 0;
371 let mut head_truncation = false;
372 let mut has_head = false;
373
374 if head_max_width_raw > 0 {
375 let head_max_width = head_max_width_raw as usize;
376 head_end = skip_white_backwards(sentence, before_start, 0);
377
378 head_start = left_field_start;
379 while head_start + head_max_width < head_end {
380 head_start = skip_something(sentence, head_start);
381 }
382
383 if head_end > head_start {
384 has_head = true;
385 before_truncation = false;
386 head_truncation = {
387 let cursor = skip_white_backwards(sentence, head_start, 0);
388 cursor > left_context_start
389 };
390 } else {
391 head_truncation = false;
392 }
393
394 if head_end > head_start {
395 head_start = skip_white(sentence, head_start);
396 }
397 }
398
399 let before_text = if before_len > 0 {
401 &sentence[before_start..before_end]
402 } else {
403 ""
404 };
405 let keyafter_text = if keyafter_end > keyafter_start {
406 &sentence[keyafter_start..keyafter_end]
407 } else {
408 ""
409 };
410 let tail_text = if has_tail && tail_end > tail_start {
411 &sentence[tail_start..tail_end]
412 } else {
413 ""
414 };
415 let head_text = if has_head && head_end > head_start {
416 &sentence[head_start..head_end]
417 } else {
418 ""
419 };
420
421 let keyword_text = &entry.keyword;
423 let after_start = keyafter_start + keyword_len;
424 let after_text = if keyafter_end > after_start {
425 &sentence[after_start..keyafter_end]
426 } else {
427 ""
428 };
429
430 LayoutFields {
431 tail: tail_text.to_string(),
432 before: before_text.to_string(),
433 keyafter: keyafter_text.to_string(),
434 keyword: keyword_text.to_string(),
435 after: after_text.to_string(),
436 head: head_text.to_string(),
437 tail_truncated: tail_truncation,
438 before_truncated: before_truncation,
439 keyafter_truncated: keyafter_truncation,
440 head_truncated: head_truncation,
441 }
442}
443
444fn format_plain(
446 entry: &KwicEntry,
447 config: &PtxConfig,
448 layout: &LayoutFields,
449 ref_max_width: usize,
450) -> String {
451 let ref_str = if config.auto_reference || config.references {
452 &entry.reference
453 } else {
454 ""
455 };
456
457 let total_width = config.width;
458 let gap = config.gap_size;
459 let trunc_str = config.flag_truncation.as_deref().unwrap_or("/");
460 let trunc_len = trunc_str.len();
461
462 let ref_width = if ref_str.is_empty() || config.right_reference {
463 0
464 } else {
465 ref_max_width + gap
466 };
467
468 let line_width = if total_width > ref_width {
469 total_width - ref_width
470 } else {
471 total_width
472 };
473
474 let half_line_width = line_width / 2;
475
476 let before_trunc_len = if layout.before_truncated {
477 trunc_len
478 } else {
479 0
480 };
481 let keyafter_trunc_len = if layout.keyafter_truncated {
482 trunc_len
483 } else {
484 0
485 };
486 let tail_trunc_len = if layout.tail_truncated { trunc_len } else { 0 };
487 let head_trunc_len = if layout.head_truncated { trunc_len } else { 0 };
488
489 let mut result = String::with_capacity(total_width + 10);
490
491 if !config.right_reference {
493 if !ref_str.is_empty() && config.auto_reference {
494 result.push_str(ref_str);
495 result.push(':');
496 let ref_total = ref_str.len() + 1;
497 let ref_pad_total = ref_max_width + gap;
498 let padding = ref_pad_total.saturating_sub(ref_total);
499 for _ in 0..padding {
500 result.push(' ');
501 }
502 } else if !ref_str.is_empty() {
503 result.push_str(ref_str);
504 let ref_pad_total = ref_max_width + gap;
505 let padding = ref_pad_total.saturating_sub(ref_str.len());
506 for _ in 0..padding {
507 result.push(' ');
508 }
509 } else {
510 for _ in 0..gap {
511 result.push(' ');
512 }
513 }
514 }
515
516 if !layout.tail.is_empty() {
518 result.push_str(&layout.tail);
519 if layout.tail_truncated {
520 result.push_str(trunc_str);
521 }
522 let tail_used = layout.tail.len() + tail_trunc_len;
523 let before_used = layout.before.len() + before_trunc_len;
524 let padding = half_line_width
525 .saturating_sub(gap)
526 .saturating_sub(tail_used)
527 .saturating_sub(before_used);
528 for _ in 0..padding {
529 result.push(' ');
530 }
531 } else {
532 let before_used = layout.before.len() + before_trunc_len;
533 let padding = half_line_width
534 .saturating_sub(gap)
535 .saturating_sub(before_used);
536 for _ in 0..padding {
537 result.push(' ');
538 }
539 }
540
541 if layout.before_truncated {
542 result.push_str(trunc_str);
543 }
544 result.push_str(&layout.before);
545
546 for _ in 0..gap {
548 result.push(' ');
549 }
550
551 result.push_str(&layout.keyafter);
553 if layout.keyafter_truncated {
554 result.push_str(trunc_str);
555 }
556
557 if !layout.head.is_empty() {
558 let keyafter_used = layout.keyafter.len() + keyafter_trunc_len;
559 let head_used = layout.head.len() + head_trunc_len;
560 let padding = half_line_width
561 .saturating_sub(keyafter_used)
562 .saturating_sub(head_used);
563 for _ in 0..padding {
564 result.push(' ');
565 }
566 if layout.head_truncated {
567 result.push_str(trunc_str);
568 }
569 result.push_str(&layout.head);
570 } else if !ref_str.is_empty() && config.right_reference {
571 let keyafter_used = layout.keyafter.len() + keyafter_trunc_len;
572 let padding = half_line_width.saturating_sub(keyafter_used);
573 for _ in 0..padding {
574 result.push(' ');
575 }
576 }
577
578 if !ref_str.is_empty() && config.right_reference {
580 for _ in 0..gap {
581 result.push(' ');
582 }
583 result.push_str(ref_str);
584 }
585
586 result
587}
588
589fn escape_roff(s: &str) -> String {
591 s.replace('\\', "\\\\").replace('"', "\\\"")
592}
593
594fn format_roff(entry: &KwicEntry, config: &PtxConfig, layout: &LayoutFields) -> String {
599 let ref_str = if config.auto_reference || config.references {
600 &entry.reference
601 } else {
602 ""
603 };
604
605 let trunc_flag = config.flag_truncation.as_deref().unwrap_or("/");
606
607 let macro_name = config.macro_name.as_deref().unwrap_or("xx");
608
609 let tail = if layout.tail_truncated {
611 format!("{}{}", layout.tail, trunc_flag)
612 } else {
613 layout.tail.clone()
614 };
615
616 let before = if layout.before_truncated {
617 format!("{}{}", trunc_flag, layout.before)
618 } else {
619 layout.before.clone()
620 };
621
622 let keyafter = if layout.keyafter_truncated {
623 format!("{}{}", layout.keyafter, trunc_flag)
624 } else {
625 layout.keyafter.clone()
626 };
627
628 let head = if layout.head_truncated {
629 format!("{}{}", trunc_flag, layout.head)
630 } else {
631 layout.head.clone()
632 };
633
634 if ref_str.is_empty() {
635 format!(
636 ".{} \"{}\" \"{}\" \"{}\" \"{}\"",
637 macro_name,
638 escape_roff(&tail),
639 escape_roff(&before),
640 escape_roff(&keyafter),
641 escape_roff(&head),
642 )
643 } else {
644 format!(
645 ".{} \"{}\" \"{}\" \"{}\" \"{}\" \"{}\"",
646 macro_name,
647 escape_roff(&tail),
648 escape_roff(&before),
649 escape_roff(&keyafter),
650 escape_roff(&head),
651 escape_roff(ref_str),
652 )
653 }
654}
655
656fn escape_tex(s: &str) -> String {
658 let mut result = String::with_capacity(s.len());
659 for ch in s.chars() {
660 match ch {
661 '\\' => result.push_str("\\backslash "),
662 '{' => result.push_str("\\{"),
663 '}' => result.push_str("\\}"),
664 '$' => result.push_str("\\$"),
665 '&' => result.push_str("\\&"),
666 '#' => result.push_str("\\#"),
667 '_' => result.push_str("\\_"),
668 '^' => result.push_str("\\^{}"),
669 '~' => result.push_str("\\~{}"),
670 '%' => result.push_str("\\%"),
671 _ => result.push(ch),
672 }
673 }
674 result
675}
676
677fn format_tex(entry: &KwicEntry, config: &PtxConfig, layout: &LayoutFields) -> String {
682 let ref_str = if config.auto_reference || config.references {
683 &entry.reference
684 } else {
685 ""
686 };
687
688 let macro_name = config.macro_name.as_deref().unwrap_or("xx");
689
690 if ref_str.is_empty() {
691 format!(
692 "\\{} {{{}}}{{{}}}{{{}}}{{{}}}{{{}}}",
693 macro_name,
694 escape_tex(&layout.tail),
695 escape_tex(&layout.before),
696 escape_tex(&layout.keyword),
697 escape_tex(&layout.after),
698 escape_tex(&layout.head),
699 )
700 } else {
701 format!(
702 "\\{} {{{}}}{{{}}}{{{}}}{{{}}}{{{}}}{{{}}}",
703 macro_name,
704 escape_tex(&layout.tail),
705 escape_tex(&layout.before),
706 escape_tex(&layout.keyword),
707 escape_tex(&layout.after),
708 escape_tex(&layout.head),
709 escape_tex(ref_str),
710 )
711 }
712}
713
714fn process_lines_into_contexts(
720 content: &str,
721 filename: Option<&str>,
722 config: &PtxConfig,
723 lines_out: &mut Vec<(String, String)>,
724 global_line_num: &mut usize,
725) {
726 let mut current_text = String::new();
727 let mut context_ref = String::new();
728 let mut first_line_of_context = true;
729
730 for line in content.lines() {
731 *global_line_num += 1;
732
733 let reference = if config.auto_reference {
734 match filename {
735 Some(name) => format!("{}:{}", name, global_line_num),
736 None => format!("{}", global_line_num),
737 }
738 } else {
739 String::new()
740 };
741
742 if first_line_of_context {
743 context_ref = reference;
744 first_line_of_context = false;
745 }
746
747 if !current_text.is_empty() {
748 current_text.push(' ');
749 }
750 current_text.push_str(line);
751
752 let trimmed = line.trim_end();
754 let ends_with_terminator =
755 trimmed.ends_with('.') || trimmed.ends_with('?') || trimmed.ends_with('!');
756
757 if ends_with_terminator || line.is_empty() {
758 if !current_text.trim().is_empty() {
759 lines_out.push((context_ref.clone(), current_text.clone()));
760 }
761 current_text.clear();
762 first_line_of_context = true;
763 }
764 }
765
766 if !current_text.trim().is_empty() {
768 lines_out.push((context_ref.clone(), current_text.clone()));
769 }
770}
771
772fn format_and_write<W: Write>(
773 lines: &[(String, String)],
774 output: &mut W,
775 config: &PtxConfig,
776) -> io::Result<()> {
777 let entries = generate_entries(lines, config);
779
780 let max_word_length = lines
782 .iter()
783 .flat_map(|(_, line)| extract_words(line))
784 .map(|(_, word)| word.len())
785 .max()
786 .unwrap_or(0);
787
788 let ref_max_width = entries.iter().map(|e| e.reference.len()).max().unwrap_or(0);
792
793 for entry in &entries {
795 let layout = compute_layout(entry, config, max_word_length, ref_max_width);
796 let formatted = match config.format {
797 OutputFormat::Plain => format_plain(entry, config, &layout, ref_max_width),
798 OutputFormat::Roff => format_roff(entry, config, &layout),
799 OutputFormat::Tex => format_tex(entry, config, &layout),
800 };
801 writeln!(output, "{}", formatted)?;
802 }
803
804 Ok(())
805}
806
807pub fn generate_ptx<R: BufRead, W: Write>(
812 input: R,
813 output: &mut W,
814 config: &PtxConfig,
815) -> io::Result<()> {
816 let mut content = String::new();
817 for line_result in input.lines() {
818 let line = line_result?;
819 content.push_str(&line);
820 content.push('\n');
821 }
822
823 let mut lines: Vec<(String, String)> = Vec::new();
824 let mut global_line_num = 0usize;
825 process_lines_into_contexts(&content, None, config, &mut lines, &mut global_line_num);
826
827 format_and_write(&lines, output, config)
828}
829
830pub fn generate_ptx_multi<W: Write>(
836 file_contents: &[(Option<String>, String)],
837 output: &mut W,
838 config: &PtxConfig,
839) -> io::Result<()> {
840 let mut lines: Vec<(String, String)> = Vec::new();
841 let mut global_line_num = 0usize;
842
843 for (filename, content) in file_contents {
844 process_lines_into_contexts(
845 content,
846 filename.as_deref(),
847 config,
848 &mut lines,
849 &mut global_line_num,
850 );
851 }
852
853 format_and_write(&lines, output, config)
854}
855
856pub fn read_word_file(path: &str) -> io::Result<HashSet<String>> {
858 let content = std::fs::read_to_string(path)?;
859 Ok(content
860 .lines()
861 .map(|l| l.trim().to_string())
862 .filter(|l| !l.is_empty())
863 .collect())
864}