basic_text_internals/
check.rs

1//! On output, several disallowed scalar values are rejected, to catch
2//! applications attempting to use them.
3
4use crate::replace;
5use crate::unicode::{BOM, ESC, ORC};
6use std::collections::VecDeque;
7use thiserror::Error;
8
9/// Test whether the given Unicode scalar value is valid in a Basic Text
10/// string.
11#[inline]
12pub fn check_basic_text_char(c: char) -> Result<(), BasicTextError> {
13    match c {
14        // Newline and tab are allowed, and escape is handled specially.
15        c if c.is_control() && c != '\n' && c != '\t' && c != ESC => control(c),
16        '\u{149}'
17        | '\u{673}'
18        | '\u{f77}'
19        | '\u{f79}'
20        | '\u{17a3}'
21        | '\u{17a4}'
22        | '\u{2329}'
23        | '\u{232a}'
24        | '\u{2126}'
25        | '\u{212a}'
26        | '\u{212b}'
27        | '\u{2df5}'
28        | '\u{111c4}'
29        | '\u{fb00}'..='\u{fb06}'
30        | '\u{9e4}'
31        | '\u{9e5}'
32        | '\u{a64}'
33        | '\u{a65}'
34        | '\u{ae4}'
35        | '\u{ae5}'
36        | '\u{b64}'
37        | '\u{b65}'
38        | '\u{be4}'
39        | '\u{be5}'
40        | '\u{c64}'
41        | '\u{c65}'
42        | '\u{ce4}'
43        | '\u{ce5}'
44        | '\u{d64}'
45        | '\u{d65}'
46        | '\u{2072}'
47        | '\u{2073}'
48        | '\u{1d455}'
49        | '\u{1d49d}'
50        | '\u{1d4a0}'
51        | '\u{1d4a1}'
52        | '\u{1d4a3}'
53        | '\u{1d4a4}'
54        | '\u{1d4a7}'
55        | '\u{1d4a8}'
56        | '\u{1d4ad}'
57        | '\u{1d4ba}'
58        | '\u{1d4bc}'
59        | '\u{1d4c4}'
60        | '\u{1d506}'
61        | '\u{1d50b}'
62        | '\u{1d50c}'
63        | '\u{1d515}'
64        | '\u{1d51d}'
65        | '\u{1d53a}'
66        | '\u{1d53f}'
67        | '\u{1d545}'
68        | '\u{1d547}'
69        | '\u{1d548}'
70        | '\u{1d549}'
71        | '\u{1d551}'
72        | '\u{f900}'..='\u{fa0d}'
73        | '\u{fa10}'
74        | '\u{fa12}'
75        | '\u{fa15}'..='\u{fa1e}'
76        | '\u{fa20}'
77        | '\u{fa22}'
78        | '\u{fa25}'..='\u{fa26}'
79        | '\u{fa2a}'..='\u{fa6d}'
80        | '\u{fa70}'..='\u{fad9}'
81        | '\u{2f800}'..='\u{2fa1d}' => replacement(c),
82        '\u{e0001}' => language_tag(),
83        '\u{fff9}'..='\u{fffb}' => interlinear_annotation(),
84        '\u{17b4}' | '\u{17b5}' => omit(c),
85        '\u{17d8}' => beyyal(),
86        '\u{206a}'..='\u{206f}' => deprecated_format_character(c),
87        '\u{2028}' => line_separation(),
88        '\u{2029}' => para_separation(),
89        '\u{202a}' | '\u{202b}' | '\u{202c}' | '\u{202d}' | '\u{202e}' | '\u{2066}'
90        | '\u{2067}' | '\u{2068}' | '\u{2069}' => bidirectional_formatting_character(),
91        '\u{fffe}'..='\u{ffff}'
92        | '\u{1fffe}'..='\u{1ffff}'
93        | '\u{2fffe}'..='\u{2ffff}'
94        | '\u{3fffe}'..='\u{3ffff}'
95        | '\u{4fffe}'..='\u{4ffff}'
96        | '\u{5fffe}'..='\u{5ffff}'
97        | '\u{6fffe}'..='\u{6ffff}'
98        | '\u{7fffe}'..='\u{7ffff}'
99        | '\u{8fffe}'..='\u{8ffff}'
100        | '\u{9fffe}'..='\u{9ffff}'
101        | '\u{afffe}'..='\u{affff}'
102        | '\u{bfffe}'..='\u{bffff}'
103        | '\u{cfffe}'..='\u{cffff}'
104        | '\u{dfffe}'..='\u{dffff}'
105        | '\u{efffe}'..='\u{effff}'
106        | '\u{ffffe}'..='\u{fffff}'
107        | '\u{10fffe}'..='\u{10ffff}'
108        | '\u{fdd0}'..='\u{fdef}' => noncharacter(),
109        ORC => orc(),
110        BOM => bom(),
111        _ => Ok(()),
112    }
113}
114
115/// An invalid Unicode scalar value sequence.
116#[derive(Error, Debug)]
117pub enum BasicTextError {
118    #[error("Color escape sequences are not enabled")]
119    ColorEscapeSequence,
120    #[error("Control code not valid in text: {0:?}")]
121    ControlCode(char),
122    #[error("Deprecated Format Characters are deprecated: {0:?}")]
123    DeprecatedFormatChar(char),
124    #[error("Escape code not valid in text")]
125    Escape,
126    #[error("Explicit Bidirectional Formatting Characters are unsupported")]
127    BidiFormatChar,
128    #[error("Interlinear Annotations depend on out-of-band information")]
129    Interlinear,
130    #[error("Language tagging is a deprecated mechanism")]
131    LanguageTag,
132    #[error("Line separation is a rich-text function")]
133    LineSeparation,
134    #[error("Noncharacters are intended for internal use only")]
135    NonChar,
136    #[error("Paragraph separation is a rich-text function")]
137    ParaSeparation,
138    #[error("U+FEFF is not necessary in Basic Text")]
139    UnneededBOM,
140    #[error("U+FFFC depends on out-of-band information")]
141    OutOfBand,
142    #[error("Omit {0:?}")]
143    Omit(char),
144    #[error("Spell beyyal with normal letters")]
145    Beyyal,
146    #[error("Unrecognized escape sequence")]
147    UnrecognizedEscape,
148    #[error("Use {yes:?} instead of {no:?}")]
149    Replacement { yes: Box<[char]>, no: char },
150}
151
152#[cold]
153fn control(c: char) -> Result<(), BasicTextError> {
154    Err(BasicTextError::ControlCode(c))
155}
156
157#[cold]
158fn replacement(c: char) -> Result<(), BasicTextError> {
159    let mut queue = VecDeque::new();
160    replace(c, &mut queue);
161    Err(BasicTextError::Replacement {
162        yes: queue.iter().copied().collect::<Vec<_>>().into_boxed_slice(),
163        no: c,
164    })
165}
166
167#[cold]
168fn omit(c: char) -> Result<(), BasicTextError> {
169    Err(BasicTextError::Omit(c))
170}
171
172#[cold]
173fn beyyal() -> Result<(), BasicTextError> {
174    Err(BasicTextError::Beyyal)
175}
176
177#[cold]
178fn deprecated_format_character(c: char) -> Result<(), BasicTextError> {
179    Err(BasicTextError::DeprecatedFormatChar(c))
180}
181
182#[cold]
183fn language_tag() -> Result<(), BasicTextError> {
184    Err(BasicTextError::LanguageTag)
185}
186
187#[cold]
188fn line_separation() -> Result<(), BasicTextError> {
189    Err(BasicTextError::LineSeparation)
190}
191
192#[cold]
193fn para_separation() -> Result<(), BasicTextError> {
194    Err(BasicTextError::ParaSeparation)
195}
196
197#[cold]
198fn bidirectional_formatting_character() -> Result<(), BasicTextError> {
199    Err(BasicTextError::BidiFormatChar)
200}
201
202#[cold]
203fn noncharacter() -> Result<(), BasicTextError> {
204    Err(BasicTextError::NonChar)
205}
206
207#[cold]
208fn orc() -> Result<(), BasicTextError> {
209    Err(BasicTextError::OutOfBand)
210}
211
212#[cold]
213fn bom() -> Result<(), BasicTextError> {
214    Err(BasicTextError::UnneededBOM)
215}
216
217#[cold]
218fn interlinear_annotation() -> Result<(), BasicTextError> {
219    Err(BasicTextError::Interlinear)
220}