1use std::collections::VecDeque;
5use std::fs::File;
6use std::io::{self, BufRead, BufReader, Cursor, Seek, SeekFrom};
7use std::path::PathBuf;
8
9use rand::seq::SliceRandom;
10use rand::Rng;
11
12use bisection::bisect_right;
13use rand::prelude::ThreadRng;
14
15#[derive(Debug)]
60pub struct RawWordSelector<T> {
61 reader: BufReader<T>,
62 letter_pos: [u64; 26],
63 letter_lines_sum: [u64; 27],
64}
65
66impl<T: Seek + io::Read> RawWordSelector<T> {
67 pub fn new(mut reader: BufReader<T>) -> Result<Self, io::Error> {
72 let mut letter_pos = [0u64; 26];
73 let mut letter_lines = [0u64; 26];
74 let mut num_lines = 0;
75 let mut cur_letter = b'a' - 1;
76 let mut cur_pos = 0;
77 let mut buffer = String::new();
78
79 fn is_letter(char: u8) -> bool {
80 char.is_ascii_lowercase()
81 }
82
83 loop {
84 buffer.clear();
85 let len = reader.read_line(&mut buffer)?;
86
87 if len == 0 {
88 break;
89 }
90
91 let line = buffer.to_ascii_lowercase();
92 num_lines += 1;
93 let first_char = line.bytes().next().unwrap();
94
95 if !is_letter(first_char) {
96 continue;
97 }
98
99 if cur_letter != first_char {
100 letter_pos[cur_pos] = reader.stream_position()?;
101
102 letter_lines[cur_pos] = num_lines;
103 num_lines = 0;
104
105 cur_pos += 1;
114 cur_letter = first_char;
115
116 if cur_pos >= 26 {
117 break;
118 }
119 }
120 }
121
122 letter_lines.rotate_left(1);
123 letter_lines[25] = num_lines;
124 let letter_lines_sum: [u64; 26] = letter_lines
125 .into_iter()
126 .scan(0, |acc, x| {
127 *acc += x;
128
129 Some(*acc)
130 })
131 .collect::<Vec<u64>>()
132 .try_into()
133 .unwrap();
134 let mut letter_lines_sum_ = [0u64; 27];
135 letter_lines_sum_[1..].copy_from_slice(&letter_lines_sum[..]);
136 let letter_lines_sum = letter_lines_sum_;
137
138 let word_selector = Self {
142 reader,
143 letter_pos,
144 letter_lines_sum,
145 };
146
147 Ok(word_selector)
148 }
149
150 fn word_at_letter_offset(
151 &mut self,
152 letter_index: usize,
153 line_offset: u64,
154 ) -> Result<String, io::Error> {
155 self.reader
156 .seek(SeekFrom::Start(self.letter_pos[letter_index]))?;
157
158 let mut buffer = String::new();
159 let mut line_no = 0;
160
161 loop {
162 buffer.clear();
163 self.reader.read_line(&mut buffer)?;
164
165 if line_no == line_offset {
166 break;
167 }
168
169 line_no += 1
170 }
171
172 buffer.truncate(buffer.len() - 1);
174
175 Ok(buffer)
176 }
177
178 fn new_word_raw(&mut self, rng: &mut ThreadRng) -> Result<String, io::Error> {
179 let line_index = rng.gen_range(self.letter_lines_sum[0]..self.letter_lines_sum[26]);
180 let letter_lines_sum_index = bisect_right(&self.letter_lines_sum, &line_index);
183
184 let line_offset = self.letter_lines_sum[letter_lines_sum_index] - line_index;
185
186 let letter_index = letter_lines_sum_index - 1;
187
188 self.word_at_letter_offset(letter_index, line_offset)
197 }
198}
199
200impl RawWordSelector<File> {
201 pub fn from_path(word_list_path: PathBuf) -> Result<Self, io::Error> {
206 let file = File::open(word_list_path)?;
207
208 let reader = BufReader::new(file);
209
210 Self::new(reader)
211 }
212}
213
214impl RawWordSelector<Cursor<String>> {
215 pub fn from_string(word_list: String) -> Result<Self, io::Error> {
220 let cursor = Cursor::new(word_list);
221 let reader = BufReader::new(cursor);
222
223 RawWordSelector::new(reader)
224 }
225}
226
227pub trait WordSelector {
229 fn new_word(&mut self) -> Result<String, io::Error>;
231
232 fn new_words(&mut self, num_words: usize) -> Result<Vec<String>, io::Error> {
234 (0..num_words).map(|_| self.new_word()).collect()
235 }
236}
237
238impl<T: Seek + io::Read> WordSelector for RawWordSelector<T> {
239 fn new_word(&mut self) -> Result<String, io::Error> {
240 let mut rng = rand::thread_rng();
241
242 let mut word = "-".to_string();
243
244 while word.len() < 2 || word.len() > 8 || !word.chars().all(|c| c.is_ascii_alphabetic()) {
245 word = self.new_word_raw(&mut rng)?;
246 }
247
248 word.make_ascii_lowercase();
249
250 Ok(word)
251 }
252}
253
254pub struct PunctuatedWordSelector {
258 selector: Box<dyn WordSelector>,
259 next_is_capital: bool,
260 punctuation_chance: f64,
261}
262
263enum PunctuationType {
264 Capitaizing(char),
265 Ending(char),
266 Surrounding(char, char),
267}
268
269const PUNCTUATION: [PunctuationType; 12] = [
270 PunctuationType::Capitaizing('!'),
271 PunctuationType::Capitaizing('?'),
272 PunctuationType::Capitaizing('.'),
273 PunctuationType::Ending(','),
274 PunctuationType::Ending(':'),
275 PunctuationType::Ending(';'),
276 PunctuationType::Surrounding('\'', '\''),
277 PunctuationType::Surrounding('"', '"'),
278 PunctuationType::Surrounding('(', ')'),
279 PunctuationType::Surrounding('{', '}'),
280 PunctuationType::Surrounding('<', '>'),
281 PunctuationType::Surrounding('[', ']'),
282];
283
284impl PunctuatedWordSelector {
285 pub fn from_word_selector(
288 word_selector: Box<dyn WordSelector>,
289 punctuation_chance: f64,
290 ) -> Self {
291 Self {
292 selector: word_selector,
293 next_is_capital: true,
294 punctuation_chance,
295 }
296 }
297}
298
299impl WordSelector for PunctuatedWordSelector {
300 fn new_word(&mut self) -> Result<String, io::Error> {
301 let mut rng = rand::thread_rng();
302
303 let mut word = self.selector.new_word()?;
304
305 let will_punctuate = rng.gen_bool(self.punctuation_chance);
306 if will_punctuate || self.next_is_capital {
307 let mut chars: VecDeque<char> = word.chars().collect();
308 if self.next_is_capital {
309 for c in chars
311 .pop_front()
312 .expect("got empty word")
313 .to_uppercase()
314 .rev()
315 {
316 chars.push_front(c)
317 }
318 self.next_is_capital = false;
319 }
320 if will_punctuate {
321 match PUNCTUATION
322 .choose(&mut rng)
323 .expect("only returns none if the slice is empty")
324 {
325 PunctuationType::Capitaizing(c) => {
326 self.next_is_capital = true;
327 chars.push_back(*c)
328 }
329 PunctuationType::Ending(c) => chars.push_back(*c),
330 PunctuationType::Surrounding(opening, closing) => {
331 chars.push_front(*opening);
332 chars.push_back(*closing);
333 }
334 }
335 }
336 word = chars.into_iter().collect();
337 }
338 Ok(word)
339 }
340}