lib/process/
pre.rs

1//! Defines the pre-processor type.
2//!
3//! Pre-processors are used to mutate fields within an [`Entry`].
4
5use crate::models::entry::{Entries, Entry};
6use crate::strings;
7
8/// A struct for pre-processing [`Entry`]s.
9#[derive(Debug, Clone, Copy)]
10pub struct PreProcessor;
11
12impl PreProcessor {
13    /// Runs all pre-strings on an [`Entry`].
14    ///
15    /// # Arguments
16    ///
17    /// * `entry` - The [`Entry`]s to process.
18    /// * `options` - The pre-process options.
19    pub fn run<O>(entries: &mut Entries, options: O)
20    where
21        O: Into<PreProcessOptions>,
22    {
23        let options: PreProcessOptions = options.into();
24
25        for entry in entries.values_mut() {
26            Self::sort_annotations(entry);
27
28            if options.extract_tags {
29                Self::extract_tags(entry);
30            }
31
32            if options.normalize_whitespace {
33                Self::normalize_whitespace(entry);
34            }
35
36            if options.convert_all_to_ascii {
37                Self::convert_all_to_ascii(entry);
38            }
39
40            if options.convert_symbols_to_ascii {
41                Self::convert_symbols_to_ascii(entry);
42            }
43        }
44    }
45
46    /// Sort annotations by [`AnnotationMetadata::location`][location].
47    ///
48    /// # Arguments
49    ///
50    /// * `entry` - The [`Entry`] to process.
51    ///
52    /// [location]: crate::models::annotation::AnnotationMetadata::location
53    pub fn sort_annotations(entry: &mut Entry) {
54        entry.annotations.sort();
55    }
56
57    /// Extracts `#tags` from [`Annotation::notes`][annotation-notes] and places
58    /// them into [`Annotation::tags`][annotation-tags]. The `#tags` are removed from
59    /// [`Annotation::notes`][annotation-notes].
60    ///
61    /// # Arguments
62    ///
63    /// * `entry` - The [`Entry`] to process.
64    ///
65    /// [annotation-notes]: crate::models::annotation::Annotation::notes
66    /// [annotation-tags]: crate::models::annotation::Annotation::tags
67    fn extract_tags(entry: &mut Entry) {
68        for annotation in &mut entry.annotations {
69            annotation.tags = strings::extract_tags(&annotation.notes);
70            annotation.notes = strings::remove_tags(&annotation.notes);
71        }
72    }
73
74    /// Normalizes whitespace in [`Annotation::body`][body].
75    ///
76    /// # Arguments
77    ///
78    /// * `entry` - The [`Entry`] to process.
79    ///
80    /// [body]: crate::models::annotation::Annotation::body
81    fn normalize_whitespace(entry: &mut Entry) {
82        for annotation in &mut entry.annotations {
83            annotation.body = strings::normalize_whitespace(&annotation.body);
84        }
85    }
86
87    /// Converts all Unicode characters found in [`Annotation::body`][body], [`Book::title`][title]
88    /// and [`Book::author`][author] to their ASCII equivalents.
89    ///
90    /// # Arguments
91    ///
92    /// * `entry` - The [`Entry`] to process.
93    ///
94    /// [author]: crate::models::book::Book::author
95    /// [body]: crate::models::annotation::Annotation::body
96    /// [title]: crate::models::book::Book::title
97    fn convert_all_to_ascii(entry: &mut Entry) {
98        entry.book.title = strings::convert_all_to_ascii(&entry.book.title);
99        entry.book.author = strings::convert_all_to_ascii(&entry.book.author);
100
101        for annotation in &mut entry.annotations {
102            annotation.body = strings::convert_all_to_ascii(&annotation.body);
103        }
104    }
105
106    /// Converts a subset of "smart" Unicode symbols found in [`Annotation::body`][body],
107    /// [`Book::title`][title] and [`Book::author`][author] to their ASCII equivalents.
108    ///
109    /// # Arguments
110    ///
111    /// * `entry` - The [`Entry`] to process.
112    ///
113    /// [author]: crate::models::book::Book::author
114    /// [body]: crate::models::annotation::Annotation::body
115    /// [title]: crate::models::book::Book::title
116    fn convert_symbols_to_ascii(entry: &mut Entry) {
117        entry.book.title = strings::convert_symbols_to_ascii(&entry.book.title);
118        entry.book.author = strings::convert_symbols_to_ascii(&entry.book.author);
119
120        for annotation in &mut entry.annotations {
121            annotation.body = strings::convert_symbols_to_ascii(&annotation.body);
122        }
123    }
124}
125
126/// A struct representing options for the [`PreProcessor`] struct.
127#[derive(Debug, Clone, Copy)]
128#[allow(clippy::struct_excessive_bools)]
129pub struct PreProcessOptions {
130    /// Toggles running `#tag` extraction from notes.
131    pub extract_tags: bool,
132
133    /// Toggles running whitespace normalization.
134    pub normalize_whitespace: bool,
135
136    /// Toggles converting all Unicode characters to ASCII.
137    pub convert_all_to_ascii: bool,
138
139    /// Toggles converting "smart" Unicode symbols to ASCII.
140    pub convert_symbols_to_ascii: bool,
141}
142
143#[cfg(test)]
144mod test {
145
146    use super::*;
147
148    mod tags {
149
150        use super::*;
151
152        use crate::models::annotation::Annotation;
153        use crate::models::book::Book;
154
155        // Tests that tags are properly extracted from `Annotation::notes`, placed into the
156        // `Annotation::tags` field.
157        #[test]
158        fn extract() {
159            let mut entry = Entry {
160                book: Book::default(),
161                annotations: vec![
162                    Annotation {
163                        notes: "#tag01 #tag02".to_string(),
164                        ..Default::default()
165                    },
166                    Annotation {
167                        notes: "#tag02 #tag03".to_string(),
168                        ..Default::default()
169                    },
170                    Annotation {
171                        notes: "#tag03 #tag01".to_string(),
172                        ..Default::default()
173                    },
174                ],
175            };
176
177            PreProcessor::extract_tags(&mut entry);
178
179            for annotation in entry.annotations {
180                assert_eq!(annotation.tags.len(), 2);
181                assert!(annotation.notes.is_empty());
182            }
183        }
184    }
185}