bio_read/lib.rs
1//! # Bio Read Library
2//!
3//! The `bio-read` library is an open-source implementation of the Bionic Reading method. Taking inspiration from [text-vide](https://github.com/Gumball12/text-vide/blob/main/HOW.md), this library ports the Bionic Reading method to Rust and provides a CLI for bio-reading text files right from the terminal.
4
5use anstyle::Style;
6use std::{
7 collections::VecDeque,
8 io::{Read, Write},
9};
10
11/// A BioReader object, allowing for customizing the bio-reading experience.
12pub struct BioReader {
13 /// The strings to be wrapped around the emphasized part of a word.
14 emphasize: [String; 2],
15 /// The strings to be wrapped around the de-emphasized part of a word.
16 de_emphasize: [String; 2],
17 /// Reverse map of fixation boundaries for quick lookup. A word of length `i` or less will be emphasized except for the last `reverse_fixation_boundaries[i]` characters. If the word is longer than `reverse_fixation_boundaries.len()`, `reverse_fixation_boundaries.last().unwrap() + 1` will be used (one more than the last).
18 reverse_fixation_boundaries: Vec<usize>,
19}
20
21impl BioReader {
22 /// Create a new BioReader object.
23 pub fn new() -> Self {
24 let bold = Style::new().bold();
25 let dim = Style::new().dimmed();
26 Self {
27 emphasize: [format!("{bold}"), format!("{bold:#}")],
28 de_emphasize: [format!("{dim}"), format!("{dim:#}")],
29 reverse_fixation_boundaries: Self::reverse_fixation_boundaries(3),
30 }
31 }
32
33 /// Set the strings to be wrapped around the emphasized part of a word. Default to bold if environment supports it.
34 ///
35 /// # Example
36 ///
37 /// ```rust
38 /// use bio_read::BioReader;
39 /// let reader = BioReader::new()
40 /// .emphasize(String::from("<em>"), String::from("</em>"))
41 /// .de_emphasize(String::from(""), String::from(""));
42 /// assert_eq!(reader.bio_read_text("hello world").unwrap(), "<em>hel</em>lo <em>wor</em>ld");
43 /// ```
44 ///
45 /// # See also
46 ///
47 /// Other methods that can be used to customize the [`BioReader`]:
48 ///
49 /// - [`BioReader::de_emphasize`]
50 /// - [`BioReader::fixation_point`]
51 pub fn emphasize(mut self, left: String, right: String) -> Self {
52 self.emphasize = [left, right];
53 self
54 }
55 /// Set the strings to be wrapped around the de-emphasized part of a word. Default to dimmed if environment supports it.
56 ///
57 /// # Example
58 ///
59 /// ```rust
60 /// use bio_read::BioReader;
61 /// let reader = BioReader::new()
62 /// .emphasize(String::from(""), String::from(""))
63 /// .de_emphasize(String::from("<de>"), String::from("</de>"));
64 /// assert_eq!(reader.bio_read_text("hello world").unwrap(), "hel<de>lo</de> wor<de>ld</de>");
65 /// ```
66 ///
67 /// # See also
68 ///
69 /// Other methods that can be used to customize the [`BioReader`]:
70 ///
71 /// - [`BioReader::emphasize`]
72 /// - [`BioReader::fixation_point`]
73 pub fn de_emphasize(mut self, left: String, right: String) -> Self {
74 self.de_emphasize = [left, right];
75 self
76 }
77 /// Set the fixation point. The lower the fixation point, the more characters will be emphasized. The `fixation_point` should be in range \[1, 5\], defaulting to 3 when not specified.
78 ///
79 /// # Example
80 ///
81 /// ```rust
82 /// use bio_read::BioReader;
83 /// let markdownBold = String::from("**");
84 /// let empty = String::from("");
85 /// let reader = BioReader::new()
86 /// .emphasize(markdownBold.clone(), markdownBold.clone())
87 /// .de_emphasize(empty.clone(), empty.clone())
88 /// .fixation_point(1); // Set fixation point to 1
89 /// assert_eq!(reader.bio_read_text("pneumonoultramicroscopicsilicovolcanoconiosis").unwrap(), "**pneumonoultramicroscopicsilicovolcano**coniosis");
90 /// let reader = BioReader::new()
91 /// .emphasize(markdownBold.clone(), markdownBold.clone())
92 /// .de_emphasize(empty.clone(), empty.clone())
93 /// .fixation_point(5); // Set fixation point to 5
94 /// assert_eq!(reader.bio_read_text("pneumonoultramicroscopicsilicovolcanoconiosis").unwrap(), "**pneumonoult**ramicroscopicsilicovolcanoconiosis");
95 /// ```
96 ///
97 /// # Panics
98 ///
99 /// Panics if `fixation_point` is not in range \[1, 5\].
100 ///
101 /// # See also
102 ///
103 /// Other methods that can be used to customize the [`BioReader`]:
104 ///
105 /// - [`BioReader::emphasize`]
106 /// - [`BioReader::de_emphasize`]
107 pub fn fixation_point(mut self, fixation_point: usize) -> Self {
108 assert!(
109 1 <= fixation_point && fixation_point <= 5,
110 "Fixation point should be in range [1, 5], but got {}",
111 fixation_point
112 );
113 self.reverse_fixation_boundaries = Self::reverse_fixation_boundaries(fixation_point);
114 self
115 }
116
117 /// Do bio-reading on `reader` and write the result to `writer`.
118 ///
119 /// # Performance
120 ///
121 /// This method guarantees linear time complexity and constant memory usage.
122 ///
123 /// # Example
124 ///
125 /// ```rust
126 /// use bio_read::BioReader;
127 /// use std::io::Write;
128 /// let reader = BioReader::new()
129 /// .emphasize(String::from("<em>"), String::from("</em>"))
130 /// .de_emphasize(String::from("<de>"), String::from("</de>"));
131 /// let mut output_buffer = Vec::new();
132 /// reader.bio_read("hello world".as_bytes(), &mut output_buffer).unwrap();
133 /// let output = String::from_utf8(output_buffer).unwrap();
134 /// assert_eq!(output, "<em>hel</em><de>lo</de> <em>wor</em><de>ld</de>");
135 /// ```
136 ///
137 /// # See also
138 ///
139 /// [`BioReader::bio_read_text`]: A simple wrapper around [`BioReader::bio_read`] for processing short strings.
140 pub fn bio_read(&self, reader: impl Read, writer: &mut impl Write) -> std::io::Result<()> {
141 let mut state = State {
142 read: 0,
143 written: 0,
144 };
145 // The buffer size is at most `self.reverse_fixation_boundaries.last().unwrap()`
146 let rev_boundaries = &self.reverse_fixation_boundaries;
147 let last = rev_boundaries.last().expect("Invalid fixation boundaries");
148 let mut buffer = VecDeque::with_capacity(*last);
149 // Iterate over the reader
150 for c in reader.bytes() {
151 let c = c? as char;
152 if c.is_ascii_alphabetic() {
153 // A letter
154 state.read += 1;
155 if state.read == 1 {
156 // Start of a word
157 // Write emphasize start
158 writer.write_all(self.emphasize[0].as_bytes())?;
159 } else {
160 // Middle of a word
161 self.try_write(writer, &mut buffer, &mut state)?;
162 }
163 buffer.push_back(c);
164 } else {
165 // Not a letter - special character
166 if state.read != 0 {
167 // End of a word
168 self.try_write(writer, &mut buffer, &mut state)?;
169 // Write emphasize end
170 writer.write_all(self.emphasize[1].as_bytes())?;
171 self.de_emphasize_buffer(writer, &mut buffer)?;
172 state.read = 0;
173 state.written = 0;
174 }
175 // Write the special character
176 writer.write_all(&[c as u8])?;
177 }
178 }
179 // Write the unfinished word
180 if state.read > 0 {
181 // Write emphasize end
182 writer.write_all(self.emphasize[1].as_bytes())?;
183 self.de_emphasize_buffer(writer, &mut buffer)?;
184 }
185 Ok(())
186 }
187 /// Do bio-reading on a piece of text. This is a simple wrapper for processing short strings. If you intend to process large files or work with streams, use [`BioReader::bio_read`] instead.
188 ///
189 /// # Example
190 ///
191 /// ```rust
192 /// use bio_read::BioReader;
193 /// let reader = BioReader::new()
194 /// .emphasize(String::from("<em>"), String::from("</em>"))
195 /// .de_emphasize(String::from("<de>"), String::from("</de>"));
196 /// let output = reader.bio_read_text("hello world").unwrap();
197 /// assert_eq!(output, "<em>hel</em><de>lo</de> <em>wor</em><de>ld</de>");
198 /// ```
199 ///
200 /// # See also
201 ///
202 /// [`BioReader::bio_read`]: Do bio-reading on `reader` and write the result to `writer`.
203 pub fn bio_read_text(&self, text: &str) -> Result<String, std::io::Error> {
204 let mut output_buffer = Vec::new();
205 self.bio_read(text.as_bytes(), &mut output_buffer)?;
206 Ok(String::from_utf8(output_buffer).unwrap())
207 }
208
209 /// Get the fixation boundaries given a fixation point. A word of length `fixation_boundaries[i]` or less will be emphasized except for the last `i` characters. If the word is longer than `fixation_boundaries.last()`, `fixation_boundaries.len()` will be used (one more than the last boundary).
210 fn fixation_boundaries(fixation_point: usize) -> Vec<usize> {
211 match fixation_point - 1 {
212 // `fixation_point` is 1-based
213 // data from https://github.com/Gumball12/text-vide/blob/main/packages/text-vide/src/getFixationLength.ts#L1-L16
214 0 => vec![0, 4, 12, 17, 24, 29, 35, 42, 48],
215 1 => vec![
216 1, 2, 7, 10, 13, 14, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49,
217 ],
218 2 => vec![
219 1, 2, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
220 45, 47, 49,
221 ],
222 3 => vec![
223 0, 2, 4, 5, 6, 8, 9, 11, 14, 15, 17, 18, 20, 0, 21, 23, 24, 26, 27, 29, 30, 32, 33,
224 35, 36, 38, 39, 41, 42, 44, 45, 47, 48,
225 ],
226 4 => vec![
227 0, 2, 3, 5, 6, 7, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 23, 24, 25, 26, 28, 29,
228 30, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 44, 46, 47, 48,
229 ],
230 _ => vec![0, 4, 12, 17, 24, 29, 35, 42, 48], // Default to 0
231 }
232 }
233 /// Get the reverse fixation boundaries given a fixation point. A word of length `i` or less will be emphasized except for the last `reverse_fixation_boundaries[i]` characters. If the word is longer than `reverse_fixation_boundaries.len()`, `reverse_fixation_boundaries.last().unwrap() + 1` will be used (one more than the last).
234 fn reverse_fixation_boundaries(fixation_point: usize) -> Vec<usize> {
235 let fixation_boundaries = Self::fixation_boundaries(fixation_point);
236 let last = fixation_boundaries.last().expect("Invalid fixation boundaries");
237 let mut fixation = 0;
238 let mut result = vec![0; *last + 1];
239 for i in 0_usize..=*last {
240 result[i] = fixation;
241 if i >= fixation_boundaries[fixation] {
242 fixation += 1;
243 }
244 }
245 result
246 }
247 /// Get the fixation length from the last character of a word. A word of length `len` or less will be emphasized except for the last `return_value` characters.
248 fn get_fixation_length_from_last(&self, len: usize) -> usize {
249 if len < self.reverse_fixation_boundaries.len() {
250 self.reverse_fixation_boundaries[len]
251 } else {
252 *self.reverse_fixation_boundaries.last().unwrap() + 1 // Longer words default to the last plus one
253 }
254 }
255 /// Write the buffer wrapped with de-emphasize tags
256 fn de_emphasize_buffer(&self, writer: &mut impl Write, buffer: &mut VecDeque<char>) -> std::io::Result<()> {
257 // Skip if the buffer is empty
258 if buffer.is_empty() {
259 return Ok(());
260 }
261 // Write de-emphasize start
262 writer.write_all(self.de_emphasize[0].as_bytes())?;
263 // Write unwritten word characters
264 let to_write = buffer.drain(..).map(|c| c as u8).collect::<Vec<_>>();
265 writer.write_all(&to_write)?;
266 // Write de-emphasize end
267 writer.write_all(self.de_emphasize[1].as_bytes())?;
268 Ok(())
269 }
270 /// Try to write a part of the buffer, with respect to the current state
271 fn try_write(&self, writer: &mut impl Write, buffer: &mut VecDeque<char>, state: &mut State) -> std::io::Result<()> {
272 let fixation_length_from_last = self.get_fixation_length_from_last(state.read);
273 // At least `least_emphasize_length` characters should be emphasized
274 let least_emphasize_length = state.read - fixation_length_from_last;
275 if state.written < least_emphasize_length {
276 // Write word[written, least_emphasize_length], which should be buffer[0, least_emphasize_length - written]
277 let to_write = buffer.drain(0..least_emphasize_length - state.written).map(|c| c as u8).collect::<Vec<_>>();
278 writer.write_all(&to_write)?;
279 state.written = least_emphasize_length;
280 }
281 Ok(())
282 }
283}
284
285/// Current state. Used internally for [`BioReader::bio_read`].
286struct State {
287 /// How many letters of the current word have been read.
288 read: usize,
289 /// How many letters of the current word have been written.
290 written: usize,
291}