bio_read/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! # Bio Read Library
//!
//! The `bio-read` library is an open-source implementation of the Bionic Reading method. Taking inspiration from [text-vide](https://github.com/Gumball12/text-vide/blob/main/HOW.md) and [a bionic reading userscript](https://github.com/yitong2333/Bionic-Reading/blob/main/%E4%BB%BF%E7%94%9F%E9%98%85%E8%AF%BB(Bionic%20Reading)-1.6.user.js), this library ports the Bionic Reading method to Rust and provides a CLI for bio-reading text files right from the terminal.

use colored::Colorize;
use std::collections::HashSet;

/// A BioReader object, allowing for customizing the bio-reading experience.
pub struct BioReader {
    /// The fucntion to emphasize part of a word. Default is bold.
    emphasize: fn(&str) -> String,
    /// The function to de-emphasize part of a word. Default is dimmed.
    de_emphasize: fn(&str) -> String,
    /// Fixation boundary list. A word of length `fixation_boundaries[i]` or less will be emphasized except for the last `i` characters. If the word is longer than `fixation_boundaries.last()`, `fixation_boundaries.len()` will be used (one more than the last boundary).
    fixation_boundaries: Vec<usize>,
    /// Fixation point. Should be in range [1, 5]. Default is 3.
    // fixation_point: usize,
    /// Common words. Only the first letter of these words will be emphasized.
    common_words: HashSet<String>,
}

impl BioReader {
    /// Create a new BioReader object.
    pub fn new() -> Self {
        Self {
            emphasize: |s| s.bold().to_string(),
            de_emphasize: |s| s.dimmed().to_string(),
            fixation_boundaries: Self::fixation_boundaries(3),
            common_words: [
                // https://github.com/yitong2333/Bionic-Reading/blob/acaecfc852f9778a58af89863b80b56bcd4eb637/%E4%BB%BF%E7%94%9F%E9%98%85%E8%AF%BB(Bionic%20Reading)-1.6.user.js#L33-L38
                "the", "and", "in", "on", "at", "by", "with", "about", "against", "between", "into",
                "through", "during", "before", "after", "above", "below", "to", "from", "up",
                "down", "over", "under", "again", "further", "then", "once", "here", "there",
                "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most",
                "other", "some",
            ]
            .iter()
            .map(|s| s.to_string())
            .collect(),
        }
    }

    /// Set the function to emphasize part of a word. Default to bold if environment supports it.
    ///
    /// # Example
    ///
    /// ```rust
    /// use bio_read::BioReader;
    /// let reader = BioReader::new().emphasize(|s| format!("**{s}**")); // Emphasize by wrapping with `**`
    /// assert_eq!(reader.bio_read_text("hello world"), "**hel**lo **wor**ld");
    /// ```
    ///
    /// # See also
    ///
    /// Other methods that can be used to customize the [`BioReader`]:
    ///
    /// - [`BioReader::de_emphasize`]
    /// - [`BioReader::fixation_point`]
    pub fn emphasize(mut self, f: fn(&str) -> String) -> Self {
        self.emphasize = f;
        self
    }
    /// Set the function to de-emphasize part of a word. Default to dimmed if environment supports it.
    ///
    /// # Example
    ///
    /// ```rust
    /// use bio_read::BioReader;
    /// let reader = BioReader::new().de_emphasize(|s| format!("_{s}_")); // De-emphasize by wrapping with `_`
    /// assert_eq!(reader.bio_read_text("hello world"), "hel_lo_ wor_ld_");
    /// ```
    ///
    /// # See also
    ///
    /// Other methods that can be used to customize the [`BioReader`]:
    ///
    /// - [`BioReader::emphasize`]
    /// - [`BioReader::fixation_point`]
    pub fn de_emphasize(mut self, f: fn(&str) -> String) -> Self {
        self.de_emphasize = f;
        self
    }
    /// Set the fixation point. The lower the fixation point, the more characters will be emphasized. The `fixation_point` should be in range \[1, 5\], defaulting to 3 when not specified.
    ///
    /// # Example
    ///
    /// ```rust
    /// use bio_read::BioReader;
    /// let reader = BioReader::new()
    ///     .emphasize(|s| format!("**{s}**"))
    ///     .fixation_point(1); // Set fixation point to 1
    /// assert_eq!(reader.bio_read_word("pneumonoultramicroscopicsilicovolcanoconiosis"), "**pneumonoultramicroscopicsilicovolcano**coniosis");
    /// let reader = BioReader::new()
    ///     .emphasize(|s| format!("**{s}**"))
    ///     .fixation_point(5); // Set fixation point to 5
    /// assert_eq!(reader.bio_read_word("pneumonoultramicroscopicsilicovolcanoconiosis"), "**pneumonoult**ramicroscopicsilicovolcanoconiosis");
    /// ```
    ///
    /// # Panics
    ///
    /// Panics if `fixation_point` is not in range \[1, 5\].
    ///
    /// # See also
    ///
    /// Other methods that can be used to customize the [`BioReader`]:
    ///
    /// - [`BioReader::emphasize`]
    /// - [`BioReader::de_emphasize`]
    pub fn fixation_point(mut self, fixation_point: usize) -> Self {
        assert!(
            1 <= fixation_point && fixation_point <= 5,
            "Fixation point should be in range [1, 5], but got {}",
            fixation_point
        );
        self.fixation_boundaries = Self::fixation_boundaries(fixation_point);
        self
    }

    /// Do bio-reading on a word.
    ///
    /// # See also
    ///
    /// [`BioReader::bio_read_text`]: Do bio-reading on a piece of text.
    pub fn bio_read_word(&self, word: &str) -> String {
        if self.common_words.contains(&word.to_lowercase()) {
            return format!(
                "{}{}",
                (self.emphasize)(&word[..1]),
                (self.de_emphasize)(&word[1..])
            );
        }
        let len = word.len();
        let fixation_boundaries = &self.fixation_boundaries;
        let fixation_length_from_last = fixation_boundaries
            .iter()
            .enumerate() // (index, value), representing (boundary, length)
            .find(|(_, length)| len <= **length) // Find the first boundary that is larger than the word length
            .map_or(fixation_boundaries.len(), |(boundary, _)| boundary); // If not found, use the last boundary
        let fixation_boundary = word.len() - fixation_length_from_last;
        let (prefix, suffix) = word.split_at(fixation_boundary);
        format!(
            "{}{}",
            (self.emphasize)(prefix),
            (self.de_emphasize)(suffix)
        )
    }
    /// Do bio-reading on a piece of text.
    ///
    /// # See also
    ///
    /// [`BioReader::bio_read_word`]: Do bio-reading on a word.
    pub fn bio_read_text(&self, text: &str) -> String {
        let mut result = String::with_capacity(text.len());
        let mut word = String::new();
        for c in text.chars() {
            if c.is_ascii_alphabetic() {
                // A letter
                word.push(c);
            } else {
                // Not a letter - separator
                if !word.is_empty() {
                    result.push_str(&self.bio_read_word(&word));
                    word.clear();
                }
                result.push(c);
            }
        }
        if !word.is_empty() {
            // In case the text ends with a word
            result.push_str(&self.bio_read_word(&word));
        }
        result
    }

    /// Get the fixation boundaries given a fixation point.
    fn fixation_boundaries(fixation_point: usize) -> Vec<usize> {
        match fixation_point - 1 {
            // `fixation_point` is 1-based
            // data from https://github.com/Gumball12/text-vide/blob/main/packages/text-vide/src/getFixationLength.ts#L1-L16
            0 => vec![0, 4, 12, 17, 24, 29, 35, 42, 48],
            1 => vec![
                1, 2, 7, 10, 13, 14, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49,
            ],
            2 => vec![
                1, 2, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
                45, 47, 49,
            ],
            3 => vec![
                0, 2, 4, 5, 6, 8, 9, 11, 14, 15, 17, 18, 20, 0, 21, 23, 24, 26, 27, 29, 30, 32, 33,
                35, 36, 38, 39, 41, 42, 44, 45, 47, 48,
            ],
            4 => vec![
                0, 2, 3, 5, 6, 7, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 23, 24, 25, 26, 28, 29,
                30, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 44, 46, 47, 48,
            ],
            _ => vec![0, 4, 12, 17, 24, 29, 35, 42, 48], // Default to 0
        }
    }
}