1use core::fmt;
4use core::iter::{FusedIterator, Peekable};
5
6#[cfg(test)]
7mod tests;
8
9#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
15pub struct CodePoint {
16 value: u32,
17}
18
19impl fmt::Debug for CodePoint {
22 #[inline]
23 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
24 write!(formatter, "U+{:04X}", self.value)
25 }
26}
27
28impl CodePoint {
29 #[inline]
35 pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
36 CodePoint { value }
37 }
38
39 #[inline]
43 pub fn from_u32(value: u32) -> Option<CodePoint> {
44 match value {
45 0..=0x10FFFF => Some(CodePoint { value }),
46 _ => None,
47 }
48 }
49
50 #[inline]
54 pub fn from_char(value: char) -> CodePoint {
55 CodePoint {
56 value: value as u32,
57 }
58 }
59
60 #[inline]
62 pub fn to_u32(&self) -> u32 {
63 self.value
64 }
65
66 #[inline]
70 pub fn to_char(&self) -> Option<char> {
71 match self.value {
72 0xD800..=0xDFFF => None,
73 _ => Some(unsafe { char::from_u32_unchecked(self.value) }),
77 }
78 }
79
80 #[inline]
85 pub fn to_char_lossy(&self) -> char {
86 self.to_char().unwrap_or('\u{FFFD}')
87 }
88
89 #[inline]
91 pub fn decode_utf16<I>(input: I) -> DecodeUtf16<I>
92 where
93 I: Iterator<Item = u16>,
94 {
95 DecodeUtf16 {
96 input: input.peekable(),
97 }
98 }
99
100 #[inline]
102 pub fn encode_utf16<I>(input: I) -> EncodeUtf16<I>
103 where
104 I: Iterator<Item = CodePoint>,
105 {
106 EncodeUtf16 { input, buf: None }
107 }
108}
109
110impl From<char> for CodePoint {
111 #[inline]
112 fn from(c: char) -> Self {
113 Self::from_char(c)
114 }
115}
116
117pub struct DecodeUtf16<I>
119where
120 I: Iterator<Item = u16>,
121{
122 input: Peekable<I>,
123}
124impl<I> Iterator for DecodeUtf16<I>
125where
126 I: Iterator<Item = u16>,
127{
128 type Item = CodePoint;
129
130 #[inline]
131 fn next(&mut self) -> Option<CodePoint> {
132 let mut val = self.input.next()? as u32;
133
134 if let 0xD800..=0xDBFF = val {
135 if let Some(y @ 0xDC00..=0xDFFF) = self.input.peek().copied() {
136 val = 0x1_0000 | ((val - 0xD800) << 10) | (y as u32 - 0xDC00);
137 self.input.next();
138 }
139 }
140
141 Some(unsafe { CodePoint::from_u32_unchecked(val) })
143 }
144
145 #[inline]
146 fn size_hint(&self) -> (usize, Option<usize>) {
147 let (l, h) = self.input.size_hint();
148 (l / 2, h)
149 }
150}
151impl<I> FusedIterator for DecodeUtf16<I> where I: FusedIterator<Item = u16> {}
152
153pub struct EncodeUtf16<I>
155where
156 I: Iterator<Item = CodePoint>,
157{
158 input: I,
159 buf: Option<u16>,
160}
161impl<I> Iterator for EncodeUtf16<I>
162where
163 I: Iterator<Item = CodePoint>,
164{
165 type Item = u16;
166
167 #[inline]
168 fn next(&mut self) -> Option<u16> {
169 if let Some(x) = self.buf.take() {
170 return Some(x);
171 }
172
173 let p = self.input.next()?.to_u32();
174 if p >= 0x1_0000 {
175 self.buf = Some(((p - 0x1_0000) & 0x3FF) as u16 | 0xDC00);
176 Some(((p - 0x1_0000) >> 10) as u16 | 0xD800)
177 } else {
178 Some(p as u16)
179 }
180 }
181
182 #[inline]
183 fn size_hint(&self) -> (usize, Option<usize>) {
184 let (l, h) = self.input.size_hint();
185 (
186 l.saturating_add(self.buf.is_some() as usize),
187 h.and_then(|x| x.checked_mul(2))
188 .and_then(|x| x.checked_add(self.buf.is_some() as usize)),
189 )
190 }
191}
192impl<I> FusedIterator for EncodeUtf16<I> where I: FusedIterator<Item = CodePoint> {}