Skip to main content

obeli_sk_boa_string/
code_point.rs

1use std::fmt::Write;
2
3/// Represents a Unicode codepoint within a [`crate::JsString`], which could be a valid
4/// '[Unicode scalar value]', or an unpaired surrogate.
5///
6/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
7#[derive(Clone, Copy, Debug, Eq, PartialEq)]
8pub enum CodePoint {
9    /// A valid Unicode scalar value.
10    Unicode(char),
11
12    /// An unpaired surrogate.
13    UnpairedSurrogate(u16),
14}
15
16impl CodePoint {
17    /// Get the number of UTF-16 code units needed to encode this code point.
18    #[inline]
19    #[must_use]
20    pub const fn code_unit_count(self) -> usize {
21        match self {
22            Self::Unicode(c) => c.len_utf16(),
23            Self::UnpairedSurrogate(_) => 1,
24        }
25    }
26
27    /// Convert the code point to its [`u32`] representation.
28    #[inline]
29    #[must_use]
30    pub fn as_u32(self) -> u32 {
31        match self {
32            Self::Unicode(c) => u32::from(c),
33            Self::UnpairedSurrogate(surr) => u32::from(surr),
34        }
35    }
36
37    /// If the code point represents a valid 'Unicode scalar value', returns its [`char`]
38    /// representation, otherwise returns [`None`] on unpaired surrogates.
39    #[inline]
40    #[must_use]
41    pub const fn as_char(self) -> Option<char> {
42        match self {
43            Self::Unicode(c) => Some(c),
44            Self::UnpairedSurrogate(_) => None,
45        }
46    }
47
48    /// Encodes this code point as UTF-16 into the provided u16 buffer, and then returns the subslice
49    /// of the buffer that contains the encoded character.
50    ///
51    /// # Panics
52    ///
53    /// Panics if the buffer is not large enough. A buffer of length 2 is large enough to encode any
54    /// code point.
55    #[inline]
56    #[must_use]
57    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
58        match self {
59            Self::Unicode(c) => c.encode_utf16(dst),
60            Self::UnpairedSurrogate(surr) => {
61                dst[0] = surr;
62                &mut dst[0..=0]
63            }
64        }
65    }
66}
67
68impl std::fmt::Display for CodePoint {
69    #[inline]
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        match self {
72            CodePoint::Unicode(c) => f.write_char(*c),
73            CodePoint::UnpairedSurrogate(c) => {
74                write!(f, "\\u{c:04X}")
75            }
76        }
77    }
78}
79
80impl From<char> for CodePoint {
81    fn from(value: char) -> Self {
82        Self::Unicode(value)
83    }
84}
85
86impl From<u16> for CodePoint {
87    fn from(value: u16) -> Self {
88        char::from_u32(u32::from(value))
89            .map_or_else(|| CodePoint::UnpairedSurrogate(value), CodePoint::Unicode)
90    }
91}