bio_read/
lib.rs

1//! # Bio Read Library
2//!
3//! The `bio-read` library is an open-source implementation of the Bionic Reading method. Taking inspiration from [text-vide](https://github.com/Gumball12/text-vide/blob/main/HOW.md), this library ports the Bionic Reading method to Rust and provides a CLI for bio-reading text files right from the terminal.
4
5use anstyle::Style;
6use std::{
7    collections::VecDeque,
8    io::{Read, Write},
9};
10
11/// A BioReader object, allowing for customizing the bio-reading experience.
12pub struct BioReader {
13    /// The strings to be wrapped around the emphasized part of a word.
14    emphasize: [String; 2],
15    /// The strings to be wrapped around the de-emphasized part of a word.
16    de_emphasize: [String; 2],
17    /// Reverse map of fixation boundaries for quick lookup. A word of length `i` or less will be emphasized except for the last `reverse_fixation_boundaries[i]` characters. If the word is longer than `reverse_fixation_boundaries.len()`, `reverse_fixation_boundaries.last().unwrap() + 1` will be used (one more than the last).
18    reverse_fixation_boundaries: Vec<usize>,
19}
20
21impl BioReader {
22    /// Create a new BioReader object.
23    pub fn new() -> Self {
24        let bold = Style::new().bold();
25        let dim = Style::new().dimmed();
26        Self {
27            emphasize: [format!("{bold}"), format!("{bold:#}")],
28            de_emphasize: [format!("{dim}"), format!("{dim:#}")],
29            reverse_fixation_boundaries: Self::reverse_fixation_boundaries(3),
30        }
31    }
32
33    /// Set the strings to be wrapped around the emphasized part of a word. Default to bold if environment supports it.
34    ///
35    /// # Example
36    ///
37    /// ```rust
38    /// use bio_read::BioReader;
39    /// let reader = BioReader::new()
40    ///     .emphasize(String::from("<em>"), String::from("</em>"))
41    ///     .de_emphasize(String::from(""), String::from(""));
42    /// assert_eq!(reader.bio_read_text("hello world").unwrap(), "<em>hel</em>lo <em>wor</em>ld");
43    /// ```
44    ///
45    /// # See also
46    ///
47    /// Other methods that can be used to customize the [`BioReader`]:
48    ///
49    /// - [`BioReader::de_emphasize`]
50    /// - [`BioReader::fixation_point`]
51    pub fn emphasize(mut self, left: String, right: String) -> Self {
52        self.emphasize = [left, right];
53        self
54    }
55    /// Set the strings to be wrapped around the de-emphasized part of a word. Default to dimmed if environment supports it.
56    ///
57    /// # Example
58    ///
59    /// ```rust
60    /// use bio_read::BioReader;
61    /// let reader = BioReader::new()
62    ///    .emphasize(String::from(""), String::from(""))
63    ///     .de_emphasize(String::from("<de>"), String::from("</de>"));
64    /// assert_eq!(reader.bio_read_text("hello world").unwrap(), "hel<de>lo</de> wor<de>ld</de>");
65    /// ```
66    ///
67    /// # See also
68    ///
69    /// Other methods that can be used to customize the [`BioReader`]:
70    ///
71    /// - [`BioReader::emphasize`]
72    /// - [`BioReader::fixation_point`]
73    pub fn de_emphasize(mut self, left: String, right: String) -> Self {
74        self.de_emphasize = [left, right];
75        self
76    }
77    /// Set the fixation point. The lower the fixation point, the more characters will be emphasized. The `fixation_point` should be in range \[1, 5\], defaulting to 3 when not specified.
78    ///
79    /// # Example
80    ///
81    /// ```rust
82    /// use bio_read::BioReader;
83    /// let markdownBold = String::from("**");
84    /// let empty = String::from("");
85    /// let reader = BioReader::new()
86    ///     .emphasize(markdownBold.clone(), markdownBold.clone())
87    ///     .de_emphasize(empty.clone(), empty.clone())
88    ///     .fixation_point(1); // Set fixation point to 1
89    /// assert_eq!(reader.bio_read_text("pneumonoultramicroscopicsilicovolcanoconiosis").unwrap(), "**pneumonoultramicroscopicsilicovolcano**coniosis");
90    /// let reader = BioReader::new()
91    ///     .emphasize(markdownBold.clone(), markdownBold.clone())
92    ///     .de_emphasize(empty.clone(), empty.clone())
93    ///     .fixation_point(5); // Set fixation point to 5
94    /// assert_eq!(reader.bio_read_text("pneumonoultramicroscopicsilicovolcanoconiosis").unwrap(), "**pneumonoult**ramicroscopicsilicovolcanoconiosis");
95    /// ```
96    ///
97    /// # Panics
98    ///
99    /// Panics if `fixation_point` is not in range \[1, 5\].
100    ///
101    /// # See also
102    ///
103    /// Other methods that can be used to customize the [`BioReader`]:
104    ///
105    /// - [`BioReader::emphasize`]
106    /// - [`BioReader::de_emphasize`]
107    pub fn fixation_point(mut self, fixation_point: usize) -> Self {
108        assert!(
109            1 <= fixation_point && fixation_point <= 5,
110            "Fixation point should be in range [1, 5], but got {}",
111            fixation_point
112        );
113        self.reverse_fixation_boundaries = Self::reverse_fixation_boundaries(fixation_point);
114        self
115    }
116
117    /// Do bio-reading on `reader` and write the result to `writer`.
118    ///
119    /// # Performance
120    ///
121    /// This method guarantees linear time complexity and constant memory usage.
122    ///
123    /// # Example
124    ///
125    /// ```rust
126    /// use bio_read::BioReader;
127    /// use std::io::Write;
128    /// let reader = BioReader::new()
129    ///     .emphasize(String::from("<em>"), String::from("</em>"))
130    ///     .de_emphasize(String::from("<de>"), String::from("</de>"));
131    /// let mut output_buffer = Vec::new();
132    /// reader.bio_read("hello world".as_bytes(), &mut output_buffer).unwrap();
133    /// let output = String::from_utf8(output_buffer).unwrap();
134    /// assert_eq!(output, "<em>hel</em><de>lo</de> <em>wor</em><de>ld</de>");
135    /// ```
136    ///
137    /// # See also
138    ///
139    /// [`BioReader::bio_read_text`]: A simple wrapper around [`BioReader::bio_read`] for processing short strings.
140    pub fn bio_read(&self, reader: impl Read, writer: &mut impl Write) -> std::io::Result<()> {
141        let mut state = State {
142            read: 0,
143            written: 0,
144        };
145        // The buffer size is at most `self.reverse_fixation_boundaries.last().unwrap()`
146        let rev_boundaries = &self.reverse_fixation_boundaries;
147        let last = rev_boundaries.last().expect("Invalid fixation boundaries");
148        let mut buffer = VecDeque::with_capacity(*last);
149        // Iterate over the reader
150        for c in reader.bytes() {
151            let c = c? as char;
152            if c.is_ascii_alphabetic() {
153                // A letter
154                state.read += 1;
155                if state.read == 1 {
156                    // Start of a word
157                    // Write emphasize start
158                    writer.write_all(self.emphasize[0].as_bytes())?;
159                } else {
160                    // Middle of a word
161                    self.try_write(writer, &mut buffer, &mut state)?;
162                }
163                buffer.push_back(c);
164            } else {
165                // Not a letter - special character
166                if state.read != 0 {
167                    // End of a word
168                    self.try_write(writer, &mut buffer, &mut state)?;
169                    // Write emphasize end
170                    writer.write_all(self.emphasize[1].as_bytes())?;
171                    self.de_emphasize_buffer(writer, &mut buffer)?;
172                    state.read = 0;
173                    state.written = 0;
174                }
175                // Write the special character
176                writer.write_all(&[c as u8])?;
177            }
178        }
179        // Write the unfinished word
180        if state.read > 0 {
181            // Write emphasize end
182            writer.write_all(self.emphasize[1].as_bytes())?;
183            self.de_emphasize_buffer(writer, &mut buffer)?;
184        }
185        Ok(())
186    }
187    /// Do bio-reading on a piece of text. This is a simple wrapper for processing short strings. If you intend to process large files or work with streams, use [`BioReader::bio_read`] instead.
188    ///
189    /// # Example
190    ///
191    /// ```rust
192    /// use bio_read::BioReader;
193    /// let reader = BioReader::new()
194    ///     .emphasize(String::from("<em>"), String::from("</em>"))
195    ///     .de_emphasize(String::from("<de>"), String::from("</de>"));
196    /// let output = reader.bio_read_text("hello world").unwrap();
197    /// assert_eq!(output, "<em>hel</em><de>lo</de> <em>wor</em><de>ld</de>");
198    /// ```
199    ///
200    /// # See also
201    ///
202    /// [`BioReader::bio_read`]: Do bio-reading on `reader` and write the result to `writer`.
203    pub fn bio_read_text(&self, text: &str) -> Result<String, std::io::Error> {
204        let mut output_buffer = Vec::new();
205        self.bio_read(text.as_bytes(), &mut output_buffer)?;
206        Ok(String::from_utf8(output_buffer).unwrap())
207    }
208
209    /// Get the fixation boundaries given a fixation point. A word of length `fixation_boundaries[i]` or less will be emphasized except for the last `i` characters. If the word is longer than `fixation_boundaries.last()`, `fixation_boundaries.len()` will be used (one more than the last boundary).
210    fn fixation_boundaries(fixation_point: usize) -> Vec<usize> {
211        match fixation_point - 1 {
212            // `fixation_point` is 1-based
213            // data from https://github.com/Gumball12/text-vide/blob/main/packages/text-vide/src/getFixationLength.ts#L1-L16
214            0 => vec![0, 4, 12, 17, 24, 29, 35, 42, 48],
215            1 => vec![
216                1, 2, 7, 10, 13, 14, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49,
217            ],
218            2 => vec![
219                1, 2, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
220                45, 47, 49,
221            ],
222            3 => vec![
223                0, 2, 4, 5, 6, 8, 9, 11, 14, 15, 17, 18, 20, 0, 21, 23, 24, 26, 27, 29, 30, 32, 33,
224                35, 36, 38, 39, 41, 42, 44, 45, 47, 48,
225            ],
226            4 => vec![
227                0, 2, 3, 5, 6, 7, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 23, 24, 25, 26, 28, 29,
228                30, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 44, 46, 47, 48,
229            ],
230            _ => vec![0, 4, 12, 17, 24, 29, 35, 42, 48], // Default to 0
231        }
232    }
233    /// Get the reverse fixation boundaries given a fixation point. A word of length `i` or less will be emphasized except for the last `reverse_fixation_boundaries[i]` characters. If the word is longer than `reverse_fixation_boundaries.len()`, `reverse_fixation_boundaries.last().unwrap() + 1` will be used (one more than the last).
234    fn reverse_fixation_boundaries(fixation_point: usize) -> Vec<usize> {
235        let fixation_boundaries = Self::fixation_boundaries(fixation_point);
236        let last = fixation_boundaries.last().expect("Invalid fixation boundaries");
237        let mut fixation = 0;
238        let mut result = vec![0; *last + 1];
239        for i in 0_usize..=*last {
240            result[i] = fixation;
241            if i >= fixation_boundaries[fixation] {
242                fixation += 1;
243            }
244        }
245        result
246    }
247    /// Get the fixation length from the last character of a word. A word of length `len` or less will be emphasized except for the last `return_value` characters.
248    fn get_fixation_length_from_last(&self, len: usize) -> usize {
249        if len < self.reverse_fixation_boundaries.len() {
250            self.reverse_fixation_boundaries[len]
251        } else {
252            *self.reverse_fixation_boundaries.last().unwrap() + 1 // Longer words default to the last plus one
253        }
254    }
255    /// Write the buffer wrapped with de-emphasize tags
256    fn de_emphasize_buffer(&self, writer: &mut impl Write, buffer: &mut VecDeque<char>) -> std::io::Result<()> {
257        // Skip if the buffer is empty
258        if buffer.is_empty() {
259            return Ok(());
260        }
261        // Write de-emphasize start
262        writer.write_all(self.de_emphasize[0].as_bytes())?;
263        // Write unwritten word characters
264        let to_write = buffer.drain(..).map(|c| c as u8).collect::<Vec<_>>();
265        writer.write_all(&to_write)?;
266        // Write de-emphasize end
267        writer.write_all(self.de_emphasize[1].as_bytes())?;
268        Ok(())
269    }
270    /// Try to write a part of the buffer, with respect to the current state
271    fn try_write(&self, writer: &mut impl Write, buffer: &mut VecDeque<char>, state: &mut State) -> std::io::Result<()> {
272        let fixation_length_from_last = self.get_fixation_length_from_last(state.read);
273        // At least `least_emphasize_length` characters should be emphasized
274        let least_emphasize_length = state.read - fixation_length_from_last;
275        if state.written < least_emphasize_length {
276            // Write word[written, least_emphasize_length], which should be buffer[0, least_emphasize_length - written]
277            let to_write = buffer.drain(0..least_emphasize_length - state.written).map(|c| c as u8).collect::<Vec<_>>();
278            writer.write_all(&to_write)?;
279            state.written = least_emphasize_length;
280        }
281        Ok(())
282    }
283}
284
285/// Current state. Used internally for [`BioReader::bio_read`].
286struct State {
287    /// How many letters of the current word have been read.
288    read: usize,
289    /// How many letters of the current word have been written.
290    written: usize,
291}