Skip to main content

plotkit_core/
text.rs

1//! Inline text markup for super- and subscripts.
2//!
3//! Labels, titles, and annotations support a small, matplotlib-flavoured markup:
4//!
5//! - `^` raises the next character (or a `^{...}` group) into a **superscript**.
6//! - `_` lowers the next character (or a `_{...}` group) into a **subscript**.
7//! - `\^`, `\_`, and `\\` are escapes that emit a literal `^`, `_`, or `\`.
8//!
9//! The markup is resolved to Unicode super/subscript code points, so it renders
10//! identically across every backend (PNG, SVG, PDF, WASM) with no special glyph
11//! handling. Characters that have no Unicode super/subscript form are left as-is.
12//!
13//! ```
14//! use plotkit_core::text::format_markup;
15//!
16//! assert_eq!(format_markup("x^2"), "x²");
17//! assert_eq!(format_markup("H_2O"), "H₂O");
18//! assert_eq!(format_markup("10^{-3}"), "10⁻³");
19//! assert_eq!(format_markup("5 \\^ 2"), "5 ^ 2"); // escaped, left literal
20//! ```
21
22/// Returns the Unicode superscript form of `c`, if one exists.
23fn superscript_char(c: char) -> Option<char> {
24    Some(match c {
25        '0' => '\u{2070}',
26        '1' => '\u{00B9}',
27        '2' => '\u{00B2}',
28        '3' => '\u{00B3}',
29        '4' => '\u{2074}',
30        '5' => '\u{2075}',
31        '6' => '\u{2076}',
32        '7' => '\u{2077}',
33        '8' => '\u{2078}',
34        '9' => '\u{2079}',
35        '+' => '\u{207A}',
36        '-' => '\u{207B}',
37        '=' => '\u{207C}',
38        '(' => '\u{207D}',
39        ')' => '\u{207E}',
40        'a' => 'ᵃ',
41        'b' => 'ᵇ',
42        'c' => 'ᶜ',
43        'd' => 'ᵈ',
44        'e' => 'ᵉ',
45        'f' => 'ᶠ',
46        'g' => 'ᵍ',
47        'h' => 'ʰ',
48        'i' => 'ⁱ',
49        'j' => 'ʲ',
50        'k' => 'ᵏ',
51        'l' => 'ˡ',
52        'm' => 'ᵐ',
53        'n' => 'ⁿ',
54        'o' => 'ᵒ',
55        'p' => 'ᵖ',
56        'r' => 'ʳ',
57        's' => 'ˢ',
58        't' => 'ᵗ',
59        'u' => 'ᵘ',
60        'v' => 'ᵛ',
61        'w' => 'ʷ',
62        'x' => 'ˣ',
63        'y' => 'ʸ',
64        'z' => 'ᶻ',
65        _ => return None,
66    })
67}
68
69/// Returns the Unicode subscript form of `c`, if one exists.
70fn subscript_char(c: char) -> Option<char> {
71    Some(match c {
72        '0' => '\u{2080}',
73        '1' => '\u{2081}',
74        '2' => '\u{2082}',
75        '3' => '\u{2083}',
76        '4' => '\u{2084}',
77        '5' => '\u{2085}',
78        '6' => '\u{2086}',
79        '7' => '\u{2087}',
80        '8' => '\u{2088}',
81        '9' => '\u{2089}',
82        '+' => '\u{208A}',
83        '-' => '\u{208B}',
84        '=' => '\u{208C}',
85        '(' => '\u{208D}',
86        ')' => '\u{208E}',
87        'a' => 'ₐ',
88        'e' => 'ₑ',
89        'h' => 'ₕ',
90        'i' => 'ᵢ',
91        'j' => 'ⱼ',
92        'k' => 'ₖ',
93        'l' => 'ₗ',
94        'm' => 'ₘ',
95        'n' => 'ₙ',
96        'o' => 'ₒ',
97        'p' => 'ₚ',
98        'r' => 'ᵣ',
99        's' => 'ₛ',
100        't' => 'ₜ',
101        'u' => 'ᵤ',
102        'v' => 'ᵥ',
103        'x' => 'ₓ',
104        _ => return None,
105    })
106}
107
108/// Resolves super/subscript markup in `input` to Unicode. See the [module
109/// docs](self) for the syntax. Strings without `^`, `_`, or `\` are returned
110/// unchanged (and the common case allocates only what it copies).
111pub fn format_markup(input: &str) -> String {
112    if !input.contains(['^', '_', '\\']) {
113        return input.to_string();
114    }
115
116    let mut out = String::with_capacity(input.len());
117    let mut chars = input.chars().peekable();
118
119    while let Some(c) = chars.next() {
120        match c {
121            '\\' => {
122                // Escape: emit the next character literally (or a lone backslash).
123                if let Some(&next) = chars.peek() {
124                    out.push(next);
125                    chars.next();
126                } else {
127                    out.push('\\');
128                }
129            }
130            '^' => apply_markup(&mut out, &mut chars, superscript_char, '^'),
131            '_' => apply_markup(&mut out, &mut chars, subscript_char, '_'),
132            other => out.push(other),
133        }
134    }
135    out
136}
137
138/// Handles a `^`/`_` marker: a braced `{...}` group or a single following char.
139fn apply_markup(
140    out: &mut String,
141    chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
142    map: fn(char) -> Option<char>,
143    marker: char,
144) {
145    match chars.peek() {
146        Some('{') => {
147            chars.next(); // consume '{'
148            for c in chars.by_ref() {
149                if c == '}' {
150                    break;
151                }
152                // Untranslatable characters are kept as-is inside the group.
153                out.push(map(c).unwrap_or(c));
154            }
155        }
156        Some(&next) => {
157            if let Some(mapped) = map(next) {
158                out.push(mapped);
159                chars.next();
160            } else {
161                // Not markup (e.g. a stray "^" before a space): keep literal.
162                out.push(marker);
163            }
164        }
165        None => out.push(marker),
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::format_markup;
172
173    #[test]
174    fn plain_text_unchanged() {
175        assert_eq!(format_markup("hello world"), "hello world");
176        assert_eq!(format_markup("temperature (C)"), "temperature (C)");
177    }
178
179    #[test]
180    fn single_char_superscript() {
181        assert_eq!(format_markup("x^2"), "x²");
182        assert_eq!(format_markup("e^x"), "eˣ");
183    }
184
185    #[test]
186    fn single_char_subscript() {
187        assert_eq!(format_markup("H_2O"), "H₂O");
188        assert_eq!(format_markup("x_i"), "xᵢ");
189    }
190
191    #[test]
192    fn braced_groups() {
193        assert_eq!(format_markup("10^{-3}"), "10⁻³");
194        assert_eq!(format_markup("a_{ij}"), "a_{ij}".replace("_{ij}", "ᵢⱼ"));
195        assert_eq!(format_markup("CO_{2}"), "CO₂");
196    }
197
198    #[test]
199    fn escapes_are_literal() {
200        assert_eq!(format_markup("5 \\^ 2"), "5 ^ 2");
201        assert_eq!(format_markup("a\\_b"), "a_b");
202        assert_eq!(format_markup("c:\\\\path"), "c:\\path");
203    }
204
205    #[test]
206    fn stray_marker_before_space_kept() {
207        assert_eq!(format_markup("2 ^ 3 is power"), "2 ^ 3 is power");
208    }
209
210    #[test]
211    fn untranslatable_in_group_kept() {
212        // 'Q' has no superscript form; it is preserved inside the group.
213        assert_eq!(format_markup("x^{2Q}"), "x²Q");
214    }
215
216    #[test]
217    fn trailing_marker() {
218        assert_eq!(format_markup("value^"), "value^");
219    }
220}