lowlevel_types/ascii/
char.rs

1//! Utilities for the `AChar` ASCII character type.
2//!
3//! The `AChar` type represents a single ASCII character.
4//!
5use std::fmt::Display;
6
7use serde::{Deserialize, Deserializer, Serialize, de::Visitor};
8
9/// The ascii::Char type represents a single, 8 bit ASCII character.
10#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
11pub struct Char(pub u8);
12
13impl Char {
14	/// Returns the NULL character (0x00)
15	pub fn null() -> Self {
16		Char(0x00)
17	}
18
19	/// Returns the character encoded as UTF-8 bytes
20	pub fn as_utf8(&self) -> [u8; 4] {
21		if self.0 <= 0x7F {
22			[self.0, 0x00, 0x00, 0x00]
23		} else {
24			[self.0 >> 6 & 0x1F | 0xC0, self.0 & 0x3F | 0x80, 0x00, 0x00]
25		}
26	}
27
28	/// Returns the length of the character encoded as UTF-8 bytes
29	pub fn len_utf8(&self) -> usize {
30		match self.0 {
31			0x00..0x80 => 1,
32			_ => 2,
33		}
34	}
35
36	/// Returns the ascii::Char as a standard Rust primitive char
37	pub fn char(&self) -> char {
38		char::from(self.0)
39	}
40
41	/// Compares two ascii::Char values, ignoring letter casing
42	pub fn eq_ignore_case(&self, other: &Char) -> bool {
43		self.lowercase() == other.lowercase()
44	}
45
46	/// Returns the uppercase version of the ascii::Char
47	pub fn uppercase(&self) -> Char {
48		if self.is_lowercase() {
49			Char(self.0 - 0x20)
50		} else {
51			Char(self.0)
52		}
53	}
54
55	/// Returns the lowercase version of the ascii::Char
56	pub fn lowercase(&self) -> Char {
57		if self.is_uppercase() {
58			Char(self.0 + 0x20)
59		} else {
60			Char(self.0)
61		}
62	}
63
64	/// Returns true if the ascii::Char is a letter (A-Z, a-z)
65	pub fn is_alphabetic(&self) -> bool {
66		self.is_uppercase() || self.is_lowercase()
67	}
68
69	/// Returns true if the ascii::Char is an uppercase letter (A-Z)
70	pub fn is_uppercase(&self) -> bool {
71		match self.0 {
72			0x41..=0x5A => true,
73			_ => false,
74		}
75	}
76
77	/// Returns true if the ascii::Char is a lowercase letter (a-z)
78	pub fn is_lowercase(&self) -> bool {
79		match self.0 {
80			0x61..=0x7A => true,
81			_ => false,
82		}
83	}
84
85	/// Returns true if the ascii::Char is a decimal digit (0-9)
86	pub fn is_numeric(&self) -> bool {
87		match self.0 {
88			0x30..=0x39 => true,
89			_ => false,
90		}
91	}
92
93	/// Returns true if the asscii::Char is punctuation
94	pub fn is_punctuation(&self) -> bool {
95		match self.0 {
96			0x21..=0x29 | 0x3A..=0x40 | 0x5B..=0x60 | 0x7B..=0x7E => true,
97			_ => false,
98		}
99	}
100
101	/// Returns true if the ascii::Char is a control code (0x00-0x1F)
102	pub fn is_control(&self) -> bool {
103		match self.0 {
104			0x00..=0x1F => true,
105			_ => false,
106		}
107	}
108
109	/// Returns true if the ascii::Char is a whitespace character (e.g. Space, Tab)
110	pub fn is_whitespace(&self) -> bool {
111		match self.0 {
112			0x09 | 0x0A | 0x0C | 0x0D | 0x20 => true,
113			_ => false,
114		}
115	}
116
117	/// Returns true if the ascii::Char is the NULL character (0x00)
118	pub fn is_null(&self) -> bool {
119		self.0 == 0x00
120	}
121}
122
123impl Display for Char {
124	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125		if self.is_control() {
126			write!(f, "\0x{:X}", self.0)
127		} else {
128			write!(f, "{}", char::from(self.0))
129		}
130	}
131}
132
133impl From<Char> for u8 {
134	fn from(value: Char) -> Self {
135		value.0.clone()
136	}
137}
138
139#[cfg(feature = "serde")]
140impl Serialize for Char {
141	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
142	where
143		S: serde::Serializer,
144	{
145		serializer.serialize_u8(self.0)
146	}
147}
148
149#[cfg(feature = "serde")]
150impl<'de> Deserialize<'de> for Char {
151	fn deserialize<D>(deserializer: D) -> Result<Char, D::Error>
152	where
153		D: Deserializer<'de>,
154	{
155		deserializer.deserialize_u8(ACharVisitor)
156	}
157}
158
159#[cfg(feature = "serde")]
160pub struct ACharVisitor;
161
162#[cfg(feature = "serde")]
163impl<'de> Visitor<'de> for ACharVisitor {
164	type Value = Char;
165
166	fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
167		formatter.write_str("a single ASCII character")
168	}
169
170	fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
171	where
172		E: serde::de::Error,
173	{
174		Ok(Char(v))
175	}
176}
177
178#[cfg(test)]
179mod tests {
180	use crate::ascii;
181
182	#[test]
183	fn test_utf8() {
184		// Test a specific, known conversion
185		let nbsp = ascii::Char(0xA0 as u8);
186		assert!(nbsp.as_utf8() == [0xC2, 0xA0, 0x00, 0x00]);
187		assert!(nbsp.len_utf8() == 2);
188
189		// Test that all valid ASCII characters convert correctly
190		for c in 0x00..=0xFF {
191			let ch: ascii::Char = ascii::Char(c);
192			assert!(if c <= 0x7F {
193				ch.as_utf8() == [c, 0x00, 0x00, 0x00]
194			} else {
195				ch.as_utf8() == [c >> 6 & 0x1F | 0xC0, c & 0x3F | 0x80, 0x00, 0x00]
196			});
197		}
198	}
199
200	#[test]
201	fn test_null() {
202		assert_eq!(ascii::Char::null(), ascii::Char(0x00));
203	}
204
205	#[test]
206	fn test_display() {
207		let cha = ascii::Char(0x00);
208		assert_eq!(format!("{}", cha), "\0x0");
209		let chb = ascii::Char(0x41);
210		assert_eq!(format!("{}", chb), "A");
211	}
212
213	#[test]
214	fn test_comparisons() {
215		for c in 0x00..=0xFF {
216			let cha = ascii::Char(c);
217			let chb = ascii::Char(c);
218
219			assert_eq!(cha, chb);
220			match c {
221				0x41..=0x5A => {
222					assert!(cha.is_alphabetic());
223					assert!(cha.is_uppercase());
224					assert!(!cha.is_lowercase());
225					assert!(!cha.is_numeric());
226					assert!(!cha.is_punctuation());
227					assert!(!cha.is_whitespace());
228					assert!(!cha.is_control());
229					let chc: ascii::Char = cha.lowercase();
230					assert!(cha.eq_ignore_case(&chc));
231				}
232				0x61..=0x7A => {
233					assert!(cha.is_alphabetic());
234					assert!(cha.is_lowercase());
235					assert!(!cha.is_uppercase());
236					let chc: ascii::Char = cha.uppercase();
237					assert!(cha.eq_ignore_case(&chc));
238				}
239				0x30..=0x39 => {
240					assert!(cha.is_numeric());
241				}
242				0x21..=0x29 | 0x3A..=0x40 | 0x5B..=0x60 | 0x7B..=0x7E => {
243					assert!(cha.is_punctuation());
244					assert_eq!(cha, cha.uppercase());
245					assert_eq!(cha, cha.lowercase());
246				}
247				0x00..=0x1F => {
248					assert!(cha.is_control());
249				}
250				_ => {}
251			}
252			// This has to be separated because Control overlaps with Whitespace and NULL
253			match c {
254				0x00 => {
255					assert!(cha.is_null())
256				}
257				0x09 | 0x0A | 0x0C | 0x0D | 0x20 => {
258					assert!(cha.is_whitespace());
259				}
260				_ => {}
261			}
262		}
263	}
264}