lib/process/pre.rs
1//! Defines the pre-processor type.
2//!
3//! Pre-processors are used to mutate fields within an [`Entry`].
4
5use crate::models::entry::{Entries, Entry};
6use crate::strings;
7
8/// A struct for pre-processing [`Entry`]s.
9#[derive(Debug, Clone, Copy)]
10pub struct PreProcessor;
11
12impl PreProcessor {
13 /// Runs all pre-strings on an [`Entry`].
14 ///
15 /// # Arguments
16 ///
17 /// * `entry` - The [`Entry`]s to process.
18 /// * `options` - The pre-process options.
19 pub fn run<O>(entries: &mut Entries, options: O)
20 where
21 O: Into<PreProcessOptions>,
22 {
23 let options: PreProcessOptions = options.into();
24
25 for entry in entries.values_mut() {
26 Self::sort_annotations(entry);
27
28 if options.extract_tags {
29 Self::extract_tags(entry);
30 }
31
32 if options.normalize_whitespace {
33 Self::normalize_whitespace(entry);
34 }
35
36 if options.convert_all_to_ascii {
37 Self::convert_all_to_ascii(entry);
38 }
39
40 if options.convert_symbols_to_ascii {
41 Self::convert_symbols_to_ascii(entry);
42 }
43 }
44 }
45
46 /// Sort annotations by [`AnnotationMetadata::location`][location].
47 ///
48 /// # Arguments
49 ///
50 /// * `entry` - The [`Entry`] to process.
51 ///
52 /// [location]: crate::models::annotation::AnnotationMetadata::location
53 pub fn sort_annotations(entry: &mut Entry) {
54 entry.annotations.sort();
55 }
56
57 /// Extracts `#tags` from [`Annotation::notes`][annotation-notes] and places
58 /// them into [`Annotation::tags`][annotation-tags]. The `#tags` are removed from
59 /// [`Annotation::notes`][annotation-notes].
60 ///
61 /// # Arguments
62 ///
63 /// * `entry` - The [`Entry`] to process.
64 ///
65 /// [annotation-notes]: crate::models::annotation::Annotation::notes
66 /// [annotation-tags]: crate::models::annotation::Annotation::tags
67 fn extract_tags(entry: &mut Entry) {
68 for annotation in &mut entry.annotations {
69 annotation.tags = strings::extract_tags(&annotation.notes);
70 annotation.notes = strings::remove_tags(&annotation.notes);
71 }
72 }
73
74 /// Normalizes whitespace in [`Annotation::body`][body].
75 ///
76 /// # Arguments
77 ///
78 /// * `entry` - The [`Entry`] to process.
79 ///
80 /// [body]: crate::models::annotation::Annotation::body
81 fn normalize_whitespace(entry: &mut Entry) {
82 for annotation in &mut entry.annotations {
83 annotation.body = strings::normalize_whitespace(&annotation.body);
84 }
85 }
86
87 /// Converts all Unicode characters found in [`Annotation::body`][body], [`Book::title`][title]
88 /// and [`Book::author`][author] to their ASCII equivalents.
89 ///
90 /// # Arguments
91 ///
92 /// * `entry` - The [`Entry`] to process.
93 ///
94 /// [author]: crate::models::book::Book::author
95 /// [body]: crate::models::annotation::Annotation::body
96 /// [title]: crate::models::book::Book::title
97 fn convert_all_to_ascii(entry: &mut Entry) {
98 entry.book.title = strings::convert_all_to_ascii(&entry.book.title);
99 entry.book.author = strings::convert_all_to_ascii(&entry.book.author);
100
101 for annotation in &mut entry.annotations {
102 annotation.body = strings::convert_all_to_ascii(&annotation.body);
103 }
104 }
105
106 /// Converts a subset of "smart" Unicode symbols found in [`Annotation::body`][body],
107 /// [`Book::title`][title] and [`Book::author`][author] to their ASCII equivalents.
108 ///
109 /// # Arguments
110 ///
111 /// * `entry` - The [`Entry`] to process.
112 ///
113 /// [author]: crate::models::book::Book::author
114 /// [body]: crate::models::annotation::Annotation::body
115 /// [title]: crate::models::book::Book::title
116 fn convert_symbols_to_ascii(entry: &mut Entry) {
117 entry.book.title = strings::convert_symbols_to_ascii(&entry.book.title);
118 entry.book.author = strings::convert_symbols_to_ascii(&entry.book.author);
119
120 for annotation in &mut entry.annotations {
121 annotation.body = strings::convert_symbols_to_ascii(&annotation.body);
122 }
123 }
124}
125
126/// A struct representing options for the [`PreProcessor`] struct.
127#[derive(Debug, Clone, Copy)]
128#[allow(clippy::struct_excessive_bools)]
129pub struct PreProcessOptions {
130 /// Toggles running `#tag` extraction from notes.
131 pub extract_tags: bool,
132
133 /// Toggles running whitespace normalization.
134 pub normalize_whitespace: bool,
135
136 /// Toggles converting all Unicode characters to ASCII.
137 pub convert_all_to_ascii: bool,
138
139 /// Toggles converting "smart" Unicode symbols to ASCII.
140 pub convert_symbols_to_ascii: bool,
141}
142
143#[cfg(test)]
144mod test {
145
146 use super::*;
147
148 mod tags {
149
150 use super::*;
151
152 use crate::models::annotation::Annotation;
153 use crate::models::book::Book;
154
155 // Tests that tags are properly extracted from `Annotation::notes`, placed into the
156 // `Annotation::tags` field.
157 #[test]
158 fn extract() {
159 let mut entry = Entry {
160 book: Book::default(),
161 annotations: vec![
162 Annotation {
163 notes: "#tag01 #tag02".to_string(),
164 ..Default::default()
165 },
166 Annotation {
167 notes: "#tag02 #tag03".to_string(),
168 ..Default::default()
169 },
170 Annotation {
171 notes: "#tag03 #tag01".to_string(),
172 ..Default::default()
173 },
174 ],
175 };
176
177 PreProcessor::extract_tags(&mut entry);
178
179 for annotation in entry.annotations {
180 assert_eq!(annotation.tags.len(), 2);
181 assert!(annotation.notes.is_empty());
182 }
183 }
184 }
185}