rdx_transform/transforms/
abbreviation.rs1use rdx_ast::*;
2
3use crate::{Transform, synthetic_pos};
4
5pub struct AbbreviationExpand;
37
38impl Transform for AbbreviationExpand {
39 fn name(&self) -> &str {
40 "abbreviation-expand"
41 }
42
43 fn transform(&self, root: &mut Root, _source: &str) {
44 let abbreviations = match root.frontmatter.as_ref() {
46 Some(fm) => match fm.get("abbreviations") {
47 Some(serde_json::Value::Object(map)) => map
48 .iter()
49 .filter_map(|(k, v)| {
50 if let serde_json::Value::String(expansion) = v {
51 Some((k.clone(), expansion.clone()))
52 } else {
53 None
54 }
55 })
56 .collect::<Vec<_>>(),
57 _ => return,
58 },
59 None => return,
60 };
61
62 if abbreviations.is_empty() {
63 return;
64 }
65
66 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
68
69 expand_nodes(&mut root.children, &abbreviations, &mut seen);
70 }
71}
72
73fn make_abbr_component(abbr: &str, expansion: &str) -> Node {
79 Node::Component(ComponentNode {
80 name: "Abbr".to_string(),
81 is_inline: true,
82 attributes: vec![AttributeNode {
83 name: "title".to_string(),
84 value: AttributeValue::String(expansion.to_string()),
85 position: synthetic_pos(),
86 }],
87 children: vec![Node::Text(TextNode {
88 value: abbr.to_string(),
89 position: synthetic_pos(),
90 })],
91 raw_content: String::new(),
92 position: synthetic_pos(),
93 })
94}
95
96fn split_on_first_abbr(
106 text: &str,
107 abbreviations: &[(String, String)],
108 seen: &std::collections::HashSet<String>,
109) -> Option<(String, Vec<Node>)> {
110 let mut best: Option<(usize, usize, &str, &str)> = None; for (abbr, expansion) in abbreviations {
115 if seen.contains(abbr.as_str()) {
116 continue;
117 }
118 if let Some(pos) = text.find(abbr.as_str()) {
119 let end = pos + abbr.len();
120 let is_better = match best {
121 None => true,
122 Some((best_start, best_end, _, _)) => {
123 pos < best_start || (pos == best_start && end > best_end)
124 }
125 };
126 if is_better {
127 best = Some((pos, end, abbr.as_str(), expansion.as_str()));
128 }
129 }
130 }
131
132 let (start, end, abbr, expansion) = best?;
133
134 let mut nodes: Vec<Node> = Vec::new();
135 if start > 0 {
136 nodes.push(Node::Text(TextNode {
137 value: text[..start].to_string(),
138 position: synthetic_pos(),
139 }));
140 }
141 nodes.push(make_abbr_component(abbr, expansion));
142 if end < text.len() {
143 nodes.push(Node::Text(TextNode {
144 value: text[end..].to_string(),
145 position: synthetic_pos(),
146 }));
147 }
148
149 Some((abbr.to_string(), nodes))
150}
151
152fn expand_nodes(
159 nodes: &mut Vec<Node>,
160 abbreviations: &[(String, String)],
161 seen: &mut std::collections::HashSet<String>,
162) {
163 let mut i = 0;
164 while i < nodes.len() {
165 let expansion_result = if let Node::Text(ref t) = nodes[i] {
166 split_on_first_abbr(&t.value, abbreviations, seen)
167 } else {
168 None
169 };
170
171 if let Some((matched_abbr, replacement_nodes)) = expansion_result {
172 seen.insert(matched_abbr);
173 let how_many = replacement_nodes.len();
174 nodes.splice(i..=i, replacement_nodes);
176 if how_many > 0 {
181 i += how_many - 1;
182 }
183 } else {
184 if let Some(children) = nodes[i].children_mut() {
186 expand_nodes(children, abbreviations, seen);
187 }
188 i += 1;
189 }
190 }
191}
192
193#[cfg(test)]
198mod tests {
199 use super::*;
200 use rdx_parser::parse;
201
202 fn parse_with_abbrevs(input: &str) -> Root {
203 parse(input)
204 }
205
206 #[test]
207 fn no_frontmatter_is_noop() {
208 let mut root = parse("HTML is great.\n");
209 AbbreviationExpand.transform(&mut root, "");
210 match &root.children[0] {
212 Node::Paragraph(p) => {
213 assert!(
214 p.children.iter().all(|n| !matches!(n, Node::Component(_))),
215 "Should have no Abbr components without frontmatter"
216 );
217 }
218 other => panic!("Expected paragraph, got {:?}", other),
219 }
220 }
221
222 #[test]
223 fn first_occurrence_wrapped() {
224 let input = "---\nabbreviations:\n HTML: HyperText Markup Language\n---\nHTML is a language. HTML again.\n";
225 let mut root = parse_with_abbrevs(input);
226 AbbreviationExpand.transform(&mut root, "");
227
228 let mut abbr_count = 0;
230 crate::walk(&root.children, &mut |n| {
231 if let Node::Component(c) = n
232 && c.name == "Abbr"
233 {
234 abbr_count += 1;
235 let title = c.attributes.iter().find_map(|a| {
236 if a.name == "title" {
237 if let AttributeValue::String(s) = &a.value {
238 Some(s.as_str())
239 } else {
240 None
241 }
242 } else {
243 None
244 }
245 });
246 assert_eq!(
247 title,
248 Some("HyperText Markup Language"),
249 "Abbr title should be the expansion"
250 );
251 }
252 });
253 assert_eq!(abbr_count, 1, "Only the first occurrence should be wrapped");
254 }
255
256 #[test]
257 fn second_occurrence_left_as_text() {
258 let input =
259 "---\nabbreviations:\n CSS: Cascading Style Sheets\n---\nCSS rules. CSS is cool.\n";
260 let mut root = parse_with_abbrevs(input);
261 AbbreviationExpand.transform(&mut root, "");
262
263 let mut abbr_count = 0;
264 crate::walk(&root.children, &mut |n| {
265 if let Node::Component(c) = n
266 && c.name == "Abbr"
267 {
268 abbr_count += 1;
269 }
270 });
271 assert_eq!(
272 abbr_count, 1,
273 "Should wrap only the first occurrence of CSS"
274 );
275 }
276
277 #[test]
278 fn multiple_abbreviations_each_first_wrapped() {
279 let input = "---\nabbreviations:\n HTML: HyperText Markup Language\n CSS: Cascading Style Sheets\n---\nHTML and CSS and HTML and CSS.\n";
280 let mut root = parse_with_abbrevs(input);
281 AbbreviationExpand.transform(&mut root, "");
282
283 let mut abbr_count = 0;
284 crate::walk(&root.children, &mut |n| {
285 if let Node::Component(c) = n
286 && c.name == "Abbr"
287 {
288 abbr_count += 1;
289 }
290 });
291 assert_eq!(
292 abbr_count, 2,
293 "First HTML and first CSS should each be wrapped once"
294 );
295 }
296
297 #[test]
298 fn abbreviation_not_in_text_is_noop() {
299 let input =
300 "---\nabbreviations:\n XML: Extensible Markup Language\n---\nNo abbreviations here.\n";
301 let mut root = parse_with_abbrevs(input);
302 AbbreviationExpand.transform(&mut root, "");
303
304 let has_abbr = {
305 let mut found = false;
306 crate::walk(&root.children, &mut |n| {
307 if let Node::Component(c) = n
308 && c.name == "Abbr"
309 {
310 found = true;
311 }
312 });
313 found
314 };
315 assert!(
316 !has_abbr,
317 "No Abbr component should be created when abbreviation isn't in text"
318 );
319 }
320
321 #[test]
322 fn abbr_component_has_correct_children() {
323 let input = "---\nabbreviations:\n API: Application Programming Interface\n---\nThe API endpoint.\n";
324 let mut root = parse_with_abbrevs(input);
325 AbbreviationExpand.transform(&mut root, "");
326
327 let mut found_abbr = false;
328 crate::walk(&root.children, &mut |n| {
329 if let Node::Component(c) = n
330 && c.name == "Abbr"
331 {
332 found_abbr = true;
333 match c.children.first() {
335 Some(Node::Text(t)) => assert_eq!(t.value, "API"),
336 other => panic!("Expected Text child in Abbr, got {:?}", other),
337 }
338 }
339 });
340 assert!(found_abbr, "Should have found an Abbr component");
341 }
342
343 #[test]
344 fn empty_abbreviations_map_is_noop() {
345 let input = "---\nabbreviations: {}\n---\nSome text.\n";
346 let mut root = parse_with_abbrevs(input);
347 AbbreviationExpand.transform(&mut root, "");
348 let has_abbr = {
350 let mut found = false;
351 crate::walk(&root.children, &mut |n| {
352 if matches!(n, Node::Component(c) if c.name == "Abbr") {
353 found = true;
354 }
355 });
356 found
357 };
358 assert!(!has_abbr);
359 }
360}