1use std::collections::HashSet;
2use std::io::{self, BufRead, Write};
3
4#[derive(Clone, Debug, PartialEq)]
6pub enum OutputFormat {
7 Roff,
9 Tex,
11 Plain,
13}
14
15#[derive(Clone, Debug)]
17pub struct PtxConfig {
18 pub width: usize,
19 pub ignore_case: bool,
20 pub auto_reference: bool,
21 pub traditional: bool,
22 pub format: OutputFormat,
23 pub ignore_words: HashSet<String>,
24 pub only_words: Option<HashSet<String>>,
25 pub references: bool,
26 pub gap_size: usize,
27 pub right_reference: bool,
28 pub sentence_regexp: Option<String>,
29 pub word_regexp: Option<String>,
30}
31
32impl Default for PtxConfig {
33 fn default() -> Self {
34 Self {
35 width: 72,
36 ignore_case: false,
37 auto_reference: false,
38 traditional: false,
39 format: OutputFormat::Plain,
40 ignore_words: HashSet::new(),
41 only_words: None,
42 references: false,
43 gap_size: 3,
44 right_reference: false,
45 sentence_regexp: None,
46 word_regexp: None,
47 }
48 }
49}
50
51#[derive(Clone, Debug)]
53struct KwicEntry {
54 reference: String,
56 left_context: String,
58 keyword: String,
60 right_context: String,
62 sort_key: String,
64}
65
66fn extract_words(line: &str) -> Vec<(usize, &str)> {
68 let mut words = Vec::new();
69 let mut start = None;
70
71 for (i, ch) in line.char_indices() {
72 if ch.is_alphanumeric() || ch == '_' {
73 if start.is_none() {
74 start = Some(i);
75 }
76 } else if let Some(s) = start {
77 words.push((s, &line[s..i]));
78 start = None;
79 }
80 }
81
82 if let Some(s) = start {
83 words.push((s, &line[s..]));
84 }
85
86 words
87}
88
89fn should_index(word: &str, config: &PtxConfig) -> bool {
91 let check_word = if config.ignore_case {
92 word.to_lowercase()
93 } else {
94 word.to_string()
95 };
96
97 if let Some(ref only) = config.only_words {
99 if config.ignore_case {
100 return only.iter().any(|w| w.to_lowercase() == check_word);
101 }
102 return only.contains(&check_word);
103 }
104
105 if config.ignore_case {
107 !config
108 .ignore_words
109 .iter()
110 .any(|w| w.to_lowercase() == check_word)
111 } else {
112 !config.ignore_words.contains(&check_word)
113 }
114}
115
116fn generate_entries(lines: &[(String, String)], config: &PtxConfig) -> Vec<KwicEntry> {
118 let mut entries = Vec::new();
119
120 for (reference, line) in lines {
121 let words = extract_words(line);
122
123 for &(word_start, word) in &words {
124 if !should_index(word, config) {
125 continue;
126 }
127
128 let word_end = word_start + word.len();
129
130 let left = line[..word_start].trim_end();
132
133 let right = line[word_end..].trim_start();
135
136 let sort_key = if config.ignore_case {
137 word.to_lowercase()
138 } else {
139 word.to_string()
140 };
141
142 entries.push(KwicEntry {
143 reference: reference.clone(),
144 left_context: left.to_string(),
145 keyword: word.to_string(),
146 right_context: right.to_string(),
147 sort_key,
148 });
149 }
150 }
151
152 entries.sort_by(|a, b| {
154 a.sort_key
155 .cmp(&b.sort_key)
156 .then_with(|| a.reference.cmp(&b.reference))
157 });
158
159 entries
160}
161
162fn truncate_left(s: &str, max_len: usize) -> &str {
164 if s.len() <= max_len {
165 return s;
166 }
167 let skip = s.len() - max_len;
168 let mut idx = skip;
170 while idx < s.len() && !s.is_char_boundary(idx) {
171 idx += 1;
172 }
173 &s[idx..]
174}
175
176fn truncate_right(s: &str, max_len: usize) -> &str {
178 if s.len() <= max_len {
179 return s;
180 }
181 let mut idx = max_len;
182 while idx > 0 && !s.is_char_boundary(idx) {
183 idx -= 1;
184 }
185 &s[..idx]
186}
187
188fn format_plain(entry: &KwicEntry, config: &PtxConfig) -> String {
190 let ref_str = if config.auto_reference || config.references {
191 &entry.reference
192 } else {
193 ""
194 };
195
196 let total_width = config.width;
197 let gap = config.gap_size;
198
199 let ref_width = if ref_str.is_empty() {
201 0
202 } else {
203 ref_str.len() + gap
204 };
205
206 let available = if total_width > ref_width {
207 total_width - ref_width
208 } else {
209 total_width
210 };
211
212 let right_half = available / 2;
215 let left_half = available - right_half;
216
217 let left = truncate_left(
219 &entry.left_context,
220 if left_half > gap { left_half - gap } else { 0 },
221 );
222
223 let right_text = if entry.right_context.is_empty() {
225 entry.keyword.clone()
226 } else {
227 format!("{} {}", entry.keyword, entry.right_context)
228 };
229 let right = truncate_right(&right_text, right_half);
230
231 if ref_str.is_empty() {
232 format!(
233 "{:>left_w$}{}{}",
234 left,
235 " ".repeat(gap),
236 right,
237 left_w = left_half - gap
238 )
239 } else if config.right_reference {
240 format!(
241 "{:>left_w$}{}{}{}{}",
242 left,
243 " ".repeat(gap),
244 right,
245 " ".repeat(gap),
246 ref_str,
247 left_w = left_half - gap,
248 )
249 } else {
250 format!(
251 "{}{}{:>left_w$}{}{}",
252 ref_str,
253 " ".repeat(gap),
254 left,
255 " ".repeat(gap),
256 right,
257 left_w = left_half - gap,
258 )
259 }
260}
261
262fn format_roff(entry: &KwicEntry, config: &PtxConfig) -> String {
264 let ref_str = if config.auto_reference || config.references {
265 &entry.reference
266 } else {
267 ""
268 };
269
270 let left = entry
272 .left_context
273 .replace('\\', "\\\\")
274 .replace('"', "\\\"");
275 let keyword = entry.keyword.replace('\\', "\\\\").replace('"', "\\\"");
276 let right = entry
277 .right_context
278 .replace('\\', "\\\\")
279 .replace('"', "\\\"");
280 let reference = ref_str.replace('\\', "\\\\").replace('"', "\\\"");
281
282 format!(
283 ".xx \"{}\" \"{}\" \"{}\" \"{}\"",
284 left, keyword, right, reference
285 )
286}
287
288fn format_tex(entry: &KwicEntry, config: &PtxConfig) -> String {
290 let ref_str = if config.auto_reference || config.references {
291 &entry.reference
292 } else {
293 ""
294 };
295
296 fn escape_tex(s: &str) -> String {
298 let mut result = String::with_capacity(s.len());
299 for ch in s.chars() {
300 match ch {
301 '\\' => result.push_str("\\backslash "),
302 '{' => result.push_str("\\{"),
303 '}' => result.push_str("\\}"),
304 '$' => result.push_str("\\$"),
305 '&' => result.push_str("\\&"),
306 '#' => result.push_str("\\#"),
307 '_' => result.push_str("\\_"),
308 '^' => result.push_str("\\^{}"),
309 '~' => result.push_str("\\~{}"),
310 '%' => result.push_str("\\%"),
311 _ => result.push(ch),
312 }
313 }
314 result
315 }
316
317 format!(
318 "\\xx {{{}}}{{{}}}{{{}}}{{{}}}",
319 escape_tex(&entry.left_context),
320 escape_tex(&entry.keyword),
321 escape_tex(&entry.right_context),
322 escape_tex(ref_str),
323 )
324}
325
326pub fn generate_ptx<R: BufRead, W: Write>(
331 input: R,
332 output: &mut W,
333 config: &PtxConfig,
334) -> io::Result<()> {
335 let mut lines: Vec<(String, String)> = Vec::new();
337 let mut line_num = 0usize;
338
339 for line_result in input.lines() {
340 let line = line_result?;
341 line_num += 1;
342
343 let reference = if config.auto_reference {
344 format!("{}", line_num)
345 } else {
346 String::new()
347 };
348
349 lines.push((reference, line));
350 }
351
352 let entries = generate_entries(&lines, config);
354
355 for entry in &entries {
357 let formatted = match config.format {
358 OutputFormat::Plain => format_plain(entry, config),
359 OutputFormat::Roff => format_roff(entry, config),
360 OutputFormat::Tex => format_tex(entry, config),
361 };
362 writeln!(output, "{}", formatted)?;
363 }
364
365 Ok(())
366}
367
368pub fn read_word_file(path: &str) -> io::Result<HashSet<String>> {
370 let content = std::fs::read_to_string(path)?;
371 Ok(content
372 .lines()
373 .map(|l| l.trim().to_string())
374 .filter(|l| !l.is_empty())
375 .collect())
376}