asciidoc_parser/
strings.rs

1// Adapted from pulldown-cmark, which comes with the following license:
2//
3// Copyright 2015 Google Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21// THE SOFTWARE.
22
23//! String types that facilitate parsing.
24
25use std::{
26    borrow::{Borrow, Cow},
27    fmt,
28    hash::{Hash, Hasher},
29    ops::Deref,
30    str::from_utf8,
31};
32
33pub(crate) const MAX_INLINE_STR_LEN: usize = 3 * std::mem::size_of::<isize>() - 2;
34
35/// Returned when trying to convert a `&str` into an [`InlineStr`] but it fails
36/// because it doesn't fit.
37#[derive(Debug)]
38pub struct StringTooLongError;
39
40/// An inline string that can contain almost three words
41/// of UTF-8 text.
42#[derive(Debug, Clone, Copy, Eq)]
43pub struct InlineStr {
44    inner: [u8; MAX_INLINE_STR_LEN],
45    len: u8,
46}
47
48impl AsRef<str> for InlineStr {
49    fn as_ref(&self) -> &str {
50        self.deref()
51    }
52}
53
54impl Hash for InlineStr {
55    fn hash<H: Hasher>(&self, state: &mut H) {
56        self.deref().hash(state);
57    }
58}
59
60impl From<char> for InlineStr {
61    fn from(c: char) -> Self {
62        let mut inner = [0u8; MAX_INLINE_STR_LEN];
63        c.encode_utf8(&mut inner);
64        let len = c.len_utf8() as u8;
65        Self { inner, len }
66    }
67}
68
69impl std::cmp::PartialEq<InlineStr> for InlineStr {
70    fn eq(&self, other: &InlineStr) -> bool {
71        self.deref() == other.deref()
72    }
73}
74
75impl TryFrom<&str> for InlineStr {
76    type Error = StringTooLongError;
77
78    fn try_from(s: &str) -> Result<InlineStr, StringTooLongError> {
79        let len = s.len();
80        if len <= MAX_INLINE_STR_LEN {
81            let mut inner = [0u8; MAX_INLINE_STR_LEN];
82            inner[..len].copy_from_slice(s.as_bytes());
83            let len = len as u8;
84            Ok(Self { inner, len })
85        } else {
86            Err(StringTooLongError)
87        }
88    }
89}
90
91impl Deref for InlineStr {
92    type Target = str;
93
94    fn deref(&self) -> &str {
95        let len = self.len as usize;
96        from_utf8(&self.inner[..len]).unwrap_or_default()
97    }
98}
99
100impl fmt::Display for InlineStr {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        write!(f, "{}", self.as_ref())
103    }
104}
105
106/// A copy-on-write string that can be owned, borrowed,
107/// or inlined.
108///
109/// It is three words long.
110///
111/// NOTE: The [`Debug`] implementation for this struct elides the storage
112/// mechanism that is chosen when pretty printing (as occurs when using the
113/// `dbg!()` macro. To obtain that information, use the “normal” debug
114/// formatting as shown below:
115///
116/// ```
117/// # use asciidoc_parser::strings::CowStr;
118///
119/// let s: &'static str = "0123456789abcdefghijklm";
120/// let s: CowStr = s.into();
121/// assert_eq!(
122///     format!("{s:?}"),
123///     "CowStr::Borrowed(\"0123456789abcdefghijklm\")"
124/// );
125/// ```
126#[derive(Eq)]
127pub enum CowStr<'a> {
128    /// An owned, immutable string.
129    Boxed(Box<str>),
130    /// A borrowed string.
131    Borrowed(&'a str),
132    /// A short inline string.
133    Inlined(InlineStr),
134}
135
136impl AsRef<str> for CowStr<'_> {
137    fn as_ref(&self) -> &str {
138        self.deref()
139    }
140}
141
142impl Hash for CowStr<'_> {
143    fn hash<H: Hasher>(&self, state: &mut H) {
144        self.deref().hash(state);
145    }
146}
147
148impl std::clone::Clone for CowStr<'_> {
149    fn clone(&self) -> Self {
150        match self {
151            CowStr::Boxed(s) => match InlineStr::try_from(&**s) {
152                Ok(inline) => CowStr::Inlined(inline),
153                Err(..) => CowStr::Boxed(s.clone()),
154            },
155            CowStr::Borrowed(s) => CowStr::Borrowed(s),
156            CowStr::Inlined(s) => CowStr::Inlined(*s),
157        }
158    }
159}
160
161impl<'a> std::cmp::PartialEq<CowStr<'a>> for CowStr<'a> {
162    fn eq(&self, other: &CowStr<'_>) -> bool {
163        self.deref() == other.deref()
164    }
165}
166
167impl<'a> From<&'a str> for CowStr<'a> {
168    fn from(s: &'a str) -> Self {
169        CowStr::Borrowed(s)
170    }
171}
172
173impl From<String> for CowStr<'_> {
174    fn from(s: String) -> Self {
175        CowStr::Boxed(s.into_boxed_str())
176    }
177}
178
179impl From<char> for CowStr<'_> {
180    fn from(c: char) -> Self {
181        CowStr::Inlined(c.into())
182    }
183}
184
185impl<'a> From<Cow<'a, str>> for CowStr<'a> {
186    fn from(s: Cow<'a, str>) -> Self {
187        match s {
188            Cow::Borrowed(s) => CowStr::Borrowed(s),
189            Cow::Owned(s) => CowStr::Boxed(s.into_boxed_str()),
190        }
191    }
192}
193
194impl<'a> From<CowStr<'a>> for Cow<'a, str> {
195    fn from(s: CowStr<'a>) -> Self {
196        match s {
197            CowStr::Boxed(s) => Cow::Owned(s.to_string()),
198            CowStr::Inlined(s) => Cow::Owned(s.to_string()),
199            CowStr::Borrowed(s) => Cow::Borrowed(s),
200        }
201    }
202}
203
204impl<'a> From<Cow<'a, char>> for CowStr<'a> {
205    fn from(s: Cow<'a, char>) -> Self {
206        CowStr::Inlined(InlineStr::from(*s))
207    }
208}
209
210impl Deref for CowStr<'_> {
211    type Target = str;
212
213    fn deref(&self) -> &str {
214        match self {
215            CowStr::Boxed(b) => b,
216            CowStr::Borrowed(b) => b,
217            CowStr::Inlined(s) => s.deref(),
218        }
219    }
220}
221
222impl Borrow<str> for CowStr<'_> {
223    fn borrow(&self) -> &str {
224        self.deref()
225    }
226}
227
228impl CowStr<'_> {
229    /// Convert the `CowStr` into an owned `String`.
230    pub fn into_string(self) -> String {
231        match self {
232            CowStr::Boxed(b) => b.into(),
233            CowStr::Borrowed(b) => b.to_owned(),
234            CowStr::Inlined(s) => s.deref().to_owned(),
235        }
236    }
237}
238
239impl fmt::Display for CowStr<'_> {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        write!(f, "{}", self.as_ref())
242    }
243}
244
245impl fmt::Debug for CowStr<'_> {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        if f.alternate() {
248            write!(f, "{:?}", self.as_ref())
249        } else {
250            match self {
251                Self::Boxed(b) => f.debug_tuple("CowStr::Boxed").field(b).finish(),
252                Self::Borrowed(b) => f.debug_tuple("CowStr::Borrowed").field(b).finish(),
253                Self::Inlined(s) => f.debug_tuple("CowStr::Inlined").field(s).finish(),
254            }
255        }
256    }
257}