asciidoc_parser/
strings.rs

1// Adapted from pulldown-cmark, which comes with the following license:
2//
3// Copyright 2015 Google Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21// THE SOFTWARE.
22
23//! String types that facilitate parsing.
24
25use std::{
26    borrow::{Borrow, Cow},
27    fmt,
28    hash::{Hash, Hasher},
29    ops::Deref,
30    str::from_utf8,
31};
32
33pub(crate) const MAX_INLINE_STR_LEN: usize = 3 * std::mem::size_of::<isize>() - 2;
34
35/// Returned when trying to convert a `&str` into an [`InlineStr`] but it fails
36/// because it doesn't fit.
37#[derive(Debug)]
38pub struct StringTooLongError;
39
40/// An inline string that can contain almost three words
41/// of UTF-8 text.
42#[derive(Debug, Clone, Copy, Eq)]
43pub struct InlineStr {
44    inner: [u8; MAX_INLINE_STR_LEN],
45    len: u8,
46}
47
48impl AsRef<str> for InlineStr {
49    fn as_ref(&self) -> &str {
50        self.deref()
51    }
52}
53
54impl Hash for InlineStr {
55    fn hash<H: Hasher>(&self, state: &mut H) {
56        self.deref().hash(state);
57    }
58}
59
60impl From<char> for InlineStr {
61    fn from(c: char) -> Self {
62        let mut inner = [0u8; MAX_INLINE_STR_LEN];
63        c.encode_utf8(&mut inner);
64        let len = c.len_utf8() as u8;
65        Self { inner, len }
66    }
67}
68
69impl std::cmp::PartialEq<InlineStr> for InlineStr {
70    fn eq(&self, other: &InlineStr) -> bool {
71        self.deref() == other.deref()
72    }
73}
74
75impl TryFrom<&str> for InlineStr {
76    type Error = StringTooLongError;
77
78    fn try_from(s: &str) -> Result<InlineStr, StringTooLongError> {
79        let len = s.len();
80        if len <= MAX_INLINE_STR_LEN {
81            let mut inner = [0u8; MAX_INLINE_STR_LEN];
82            inner[..len].copy_from_slice(s.as_bytes());
83            let len = len as u8;
84            Ok(Self { inner, len })
85        } else {
86            Err(StringTooLongError)
87        }
88    }
89}
90
91impl Deref for InlineStr {
92    type Target = str;
93
94    fn deref(&self) -> &str {
95        let len = self.len as usize;
96        from_utf8(&self.inner[..len]).unwrap_or_default()
97    }
98}
99
100impl fmt::Display for InlineStr {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        write!(f, "{}", self.as_ref())
103    }
104}
105
106/// A copy-on-write string that can be owned, borrowed,
107/// or inlined.
108///
109/// It is three words long.
110///
111/// NOTE: The [`Debug`] implementation for this struct elides the storage
112/// mechanism that is chosen when pretty printing (as occurs when using the
113/// `dbg!()` macro. To obtain that information, use the β€œnormal” debug
114/// formatting as shown below:
115///
116/// ```
117/// # use asciidoc_parser::strings::CowStr;
118///
119/// let s: &'static str = "0123456789abcdefghijklm";
120/// let s: CowStr = s.into();
121/// assert_eq!(
122///     format!("{s:?}"),
123///     "CowStr::Borrowed(\"0123456789abcdefghijklm\")"
124/// );
125/// ```
126#[derive(Eq)]
127pub enum CowStr<'a> {
128    /// An owned, immutable string.
129    Boxed(Box<str>),
130    /// A borrowed string.
131    Borrowed(&'a str),
132    /// A short inline string.
133    Inlined(InlineStr),
134}
135
136impl AsRef<str> for CowStr<'_> {
137    fn as_ref(&self) -> &str {
138        self.deref()
139    }
140}
141
142impl Hash for CowStr<'_> {
143    fn hash<H: Hasher>(&self, state: &mut H) {
144        self.deref().hash(state);
145    }
146}
147
148impl std::clone::Clone for CowStr<'_> {
149    fn clone(&self) -> Self {
150        match self {
151            CowStr::Boxed(s) => match InlineStr::try_from(&**s) {
152                Ok(inline) => CowStr::Inlined(inline),
153                Err(..) => CowStr::Boxed(s.clone()),
154            },
155            CowStr::Borrowed(s) => CowStr::Borrowed(s),
156            CowStr::Inlined(s) => CowStr::Inlined(*s),
157        }
158    }
159}
160
161impl<'a> std::cmp::PartialEq<CowStr<'a>> for CowStr<'a> {
162    fn eq(&self, other: &CowStr<'_>) -> bool {
163        self.deref() == other.deref()
164    }
165}
166
167impl<'a> From<&'a str> for CowStr<'a> {
168    fn from(s: &'a str) -> Self {
169        CowStr::Borrowed(s)
170    }
171}
172
173impl From<String> for CowStr<'_> {
174    fn from(s: String) -> Self {
175        CowStr::Boxed(s.into_boxed_str())
176    }
177}
178
179impl From<char> for CowStr<'_> {
180    fn from(c: char) -> Self {
181        CowStr::Inlined(c.into())
182    }
183}
184
185impl<'a> From<Cow<'a, str>> for CowStr<'a> {
186    fn from(s: Cow<'a, str>) -> Self {
187        match s {
188            Cow::Borrowed(s) => CowStr::Borrowed(s),
189            Cow::Owned(s) => CowStr::Boxed(s.into_boxed_str()),
190        }
191    }
192}
193
194impl<'a> From<CowStr<'a>> for Cow<'a, str> {
195    fn from(s: CowStr<'a>) -> Self {
196        match s {
197            CowStr::Boxed(s) => Cow::Owned(s.to_string()),
198            CowStr::Inlined(s) => Cow::Owned(s.to_string()),
199            CowStr::Borrowed(s) => Cow::Borrowed(s),
200        }
201    }
202}
203
204impl<'a> From<Cow<'a, char>> for CowStr<'a> {
205    fn from(s: Cow<'a, char>) -> Self {
206        CowStr::Inlined(InlineStr::from(*s))
207    }
208}
209
210impl Deref for CowStr<'_> {
211    type Target = str;
212
213    fn deref(&self) -> &str {
214        match self {
215            CowStr::Boxed(b) => b,
216            CowStr::Borrowed(b) => b,
217            CowStr::Inlined(s) => s.deref(),
218        }
219    }
220}
221
222impl Borrow<str> for CowStr<'_> {
223    fn borrow(&self) -> &str {
224        self.deref()
225    }
226}
227
228impl CowStr<'_> {
229    /// Convert the `CowStr` into an owned `String`.
230    pub fn into_string(self) -> String {
231        match self {
232            CowStr::Boxed(b) => b.into(),
233            CowStr::Borrowed(b) => b.to_owned(),
234            CowStr::Inlined(s) => s.deref().to_owned(),
235        }
236    }
237}
238
239impl fmt::Display for CowStr<'_> {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        write!(f, "{}", self.as_ref())
242    }
243}
244
245impl fmt::Debug for CowStr<'_> {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        if f.alternate() {
248            write!(f, "{:?}", self.as_ref())
249        } else {
250            match self {
251                Self::Boxed(b) => f.debug_tuple("CowStr::Boxed").field(b).finish(),
252                Self::Borrowed(b) => f.debug_tuple("CowStr::Borrowed").field(b).finish(),
253                Self::Inlined(s) => f.debug_tuple("CowStr::Inlined").field(s).finish(),
254            }
255        }
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    #![allow(clippy::panic)]
262    #![allow(clippy::unwrap_used)]
263
264    mod string_too_long_err {
265        use crate::strings::StringTooLongError;
266
267        #[test]
268        fn impl_debug() {
269            let e = StringTooLongError;
270            assert_eq!(format!("{e:#?}"), "StringTooLongError");
271        }
272    }
273
274    mod inline_str {
275        use std::ops::Deref;
276
277        use crate::strings::*;
278
279        #[test]
280        fn from_ascii() {
281            let s: InlineStr = 'a'.into();
282            assert_eq!("a", s.deref());
283        }
284
285        #[test]
286        fn from_unicode() {
287            let s: InlineStr = 'πŸ”'.into();
288            assert_eq!("πŸ”", s.deref());
289        }
290
291        #[test]
292        fn impl_debug() {
293            let s: InlineStr = 'a'.into();
294            assert_eq!(
295                format!("{s:#?}"),
296                r#"InlineStr {
297    inner: [
298        97,
299        0,
300        0,
301        0,
302        0,
303        0,
304        0,
305        0,
306        0,
307        0,
308        0,
309        0,
310        0,
311        0,
312        0,
313        0,
314        0,
315        0,
316        0,
317        0,
318        0,
319        0,
320    ],
321    len: 1,
322}"#
323            );
324        }
325
326        #[test]
327        fn impl_hash() {
328            use std::{
329                collections::hash_map::DefaultHasher,
330                hash::{Hash, Hasher},
331            };
332
333            let mut hasher = DefaultHasher::new();
334            "πŸ”".hash(&mut hasher);
335            let expected = hasher.finish();
336
337            let s: InlineStr = 'πŸ”'.into();
338            let mut hasher = DefaultHasher::new();
339            s.hash(&mut hasher);
340            let actual = hasher.finish();
341
342            let s: InlineStr = 'a'.into();
343            let mut hasher = DefaultHasher::new();
344            s.hash(&mut hasher);
345            let mismatch = hasher.finish();
346
347            assert_eq!(expected, actual);
348            assert_ne!(expected, mismatch);
349        }
350
351        #[test]
352        fn impl_partial_eq() {
353            let s1: InlineStr = 'πŸ”'.into();
354            let s2: InlineStr = 'πŸ”'.into();
355            let s3: InlineStr = 'a'.into();
356
357            assert_eq!(s1, s2);
358            assert_ne!(s1, s3);
359        }
360
361        #[test]
362        #[allow(clippy::assertions_on_constants)]
363        fn max_len_atleast_four() {
364            // we need 4 bytes to store a char
365            assert!(MAX_INLINE_STR_LEN >= 4);
366        }
367
368        #[test]
369        #[cfg(target_pointer_width = "64")]
370        fn fits_twentytwo() {
371            let s = "0123456789abcdefghijkl";
372            let stack_str = InlineStr::try_from(s).unwrap();
373            assert_eq!(stack_str.deref(), s);
374        }
375
376        #[test]
377        #[cfg(target_pointer_width = "64")]
378        fn doesnt_fit_twentythree() {
379            let s = "0123456789abcdefghijklm";
380            let _stack_str = InlineStr::try_from(s).unwrap_err();
381        }
382    }
383
384    mod cow_str {
385        use std::{
386            borrow::{Borrow, Cow},
387            ops::Deref,
388        };
389
390        use crate::strings::*;
391
392        #[test]
393        fn size() {
394            let size = std::mem::size_of::<CowStr>();
395            let word_size = std::mem::size_of::<isize>();
396            assert_eq!(3 * word_size, size);
397        }
398
399        #[test]
400        fn char_to_string() {
401            let c = '藏';
402            let smort: CowStr = c.into();
403            let owned: String = smort.to_string();
404            let expected = "藏".to_owned();
405            assert_eq!(expected, owned);
406        }
407
408        #[test]
409        #[cfg(target_pointer_width = "64")]
410        fn small_boxed_str_clones_to_stack() {
411            let s = "0123456789abcde".to_owned();
412            let smort: CowStr = s.into();
413            let smort_clone = smort.clone();
414
415            if let CowStr::Inlined(..) = smort_clone {
416            } else {
417                panic!("Expected a Inlined variant!");
418            }
419        }
420
421        #[test]
422        fn cow_to_cow_str() {
423            let s = "some text";
424            let cow = Cow::Borrowed(s);
425            let actual = CowStr::from(cow);
426            let expected = CowStr::Borrowed(s);
427            assert_eq!(actual, expected);
428            assert!(variant_eq(&actual, &expected));
429
430            let s = "some text".to_string();
431            let cow: Cow<str> = Cow::Owned(s.clone());
432            let actual = CowStr::from(cow);
433            let expected = CowStr::Boxed(s.into_boxed_str());
434            assert_eq!(actual, expected);
435            assert!(variant_eq(&actual, &expected));
436        }
437
438        #[test]
439        fn cow_str_to_cow() {
440            let s = "some text";
441            let cow_str = CowStr::Borrowed(s);
442            let actual = Cow::from(cow_str);
443            let expected = Cow::Borrowed(s);
444            assert_eq!(actual, expected);
445            assert!(variant_eq(&actual, &expected));
446
447            let s = "s";
448            let inline_str: InlineStr = InlineStr::try_from(s).unwrap();
449            let cow_str = CowStr::Inlined(inline_str);
450            let actual = Cow::from(cow_str);
451            let expected: Cow<str> = Cow::Owned(s.to_string());
452            assert_eq!(actual, expected);
453            assert!(variant_eq(&actual, &expected));
454
455            let s = "s";
456            let cow_str = CowStr::Boxed(s.to_string().into_boxed_str());
457            let actual = Cow::from(cow_str);
458            let expected: Cow<str> = Cow::Owned(s.to_string());
459            assert_eq!(actual, expected);
460            assert!(variant_eq(&actual, &expected));
461        }
462
463        #[test]
464        fn cow_char_to_cow_str() {
465            let c = 'c';
466            let cow: Cow<char> = Cow::Owned(c);
467            let actual = CowStr::from(cow);
468            let expected = CowStr::Inlined(InlineStr::from(c));
469            assert_eq!(actual, expected);
470            assert!(variant_eq(&actual, &expected));
471
472            let c = 'c';
473            let cow: Cow<char> = Cow::Borrowed(&c);
474            let actual = CowStr::from(cow);
475            let expected = CowStr::Inlined(InlineStr::from(c));
476            assert_eq!(actual, expected);
477            assert!(variant_eq(&actual, &expected));
478        }
479
480        fn variant_eq<T>(a: &T, b: &T) -> bool {
481            std::mem::discriminant(a) == std::mem::discriminant(b)
482        }
483
484        #[test]
485        fn impl_debug_pretty_print_for_inline() {
486            let c = '藏';
487            let s: CowStr = c.into();
488
489            assert_eq!(format!("{s:#?}"), r#""藏""#);
490        }
491
492        #[test]
493        fn impl_debug_pretty_print_for_boxed() {
494            let s = "blah blah blah".to_string();
495            let s: CowStr = s.into();
496
497            assert_eq!(format!("{s:#?}"), r#""blah blah blah""#);
498        }
499
500        #[test]
501        fn impl_debug_pretty_print_for_borrowed() {
502            let s: &'static str = "0123456789abcdefghijklm";
503            let s: CowStr = s.into();
504
505            assert_eq!(format!("{s:#?}"), r#""0123456789abcdefghijklm""#);
506        }
507
508        #[test]
509        fn impl_debug_for_inline() {
510            let c = '藏';
511            let s: CowStr = c.into();
512
513            assert_eq!(
514                format!("{s:?}"),
515                "CowStr::Inlined(InlineStr { inner: [232, 151, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], len: 3 })"
516            );
517        }
518
519        #[test]
520        fn impl_debug_for_boxed() {
521            let s = "blah blah blah".to_string();
522            let s: CowStr = s.into();
523
524            assert_eq!(format!("{s:?}"), "CowStr::Boxed(\"blah blah blah\")");
525        }
526
527        #[test]
528        fn impl_debug_for_borrowed() {
529            let s: &'static str = "0123456789abcdefghijklm";
530            let s: CowStr = s.into();
531
532            assert_eq!(
533                format!("{s:?}"),
534                "CowStr::Borrowed(\"0123456789abcdefghijklm\")"
535            );
536        }
537
538        #[test]
539        fn impl_clone_boxed_long() {
540            let s = "this string won't fit in a box".to_owned();
541            let s: CowStr = s.into();
542            if let CowStr::Boxed(_) = s {
543            } else {
544                panic!("Expected Boxed case");
545            }
546
547            let s2 = s.clone();
548            assert_eq!(s.deref(), s2.deref());
549
550            if let CowStr::Boxed(_) = s2 {
551            } else {
552                panic!("Expected Boxed clone");
553            }
554        }
555
556        #[test]
557        fn impl_clone_borrowed() {
558            let s = "this long string is borrowed";
559            let s: CowStr = s.into();
560            if let CowStr::Borrowed(_) = s {
561            } else {
562                panic!("Expected Borrowed case");
563            }
564
565            let s2 = s.clone();
566            assert_eq!(s.deref(), s2.deref());
567
568            if let CowStr::Borrowed(_) = s2 {
569            } else {
570                panic!("Expected Borrowed clone");
571            }
572        }
573
574        #[test]
575        fn impl_clone_inlined() {
576            let s: CowStr = 's'.into();
577            if let CowStr::Inlined(_) = s {
578            } else {
579                panic!("Expected Inlined case");
580            }
581
582            let s2 = s.clone();
583            assert_eq!(s.deref(), s2.deref());
584
585            if let CowStr::Inlined(_) = s2 {
586            } else {
587                panic!("Expected Inlined clone");
588            }
589        }
590
591        #[test]
592        fn impl_hash() {
593            use std::{
594                collections::hash_map::DefaultHasher,
595                hash::{Hash, Hasher},
596            };
597
598            let mut hasher = DefaultHasher::new();
599            "πŸ”".hash(&mut hasher);
600            let expected = hasher.finish();
601
602            let s: CowStr = 'πŸ”'.into();
603            if let CowStr::Inlined(_) = s {
604            } else {
605                panic!("Expected Inlined case");
606            }
607            let mut hasher = DefaultHasher::new();
608            s.hash(&mut hasher);
609            let actual = hasher.finish();
610            assert_eq!(expected, actual);
611
612            let s = CowStr::Borrowed("πŸ”");
613            let mut hasher = DefaultHasher::new();
614            s.hash(&mut hasher);
615            let actual = hasher.finish();
616            assert_eq!(expected, actual);
617
618            let s = "πŸ”".to_owned();
619            let s: CowStr = s.into();
620            if let CowStr::Boxed(_) = s {
621            } else {
622                panic!("Expected Boxed case");
623            }
624            let mut hasher = DefaultHasher::new();
625            s.hash(&mut hasher);
626            assert_eq!(expected, actual);
627        }
628
629        #[test]
630        fn impl_from_str() {
631            let s = "xyz";
632            let s: CowStr = s.into();
633            assert_eq!(s.deref(), "xyz");
634
635            if let CowStr::Borrowed(_) = s {
636            } else {
637                panic!("Expected Borrowed case");
638            }
639        }
640
641        #[test]
642        fn impl_borrow() {
643            let s: CowStr = "xyz".into();
644            let s: &str = s.borrow();
645            assert_eq!(s, "xyz");
646        }
647
648        #[test]
649        fn into_string_boxed() {
650            let s = "this string won't fit in a box".to_owned();
651            let s: CowStr = s.into();
652            if let CowStr::Boxed(_) = s {
653            } else {
654                panic!("Expected Boxed case");
655            }
656
657            let s2 = s.into_string();
658            assert_eq!(&s2, "this string won't fit in a box");
659        }
660
661        #[test]
662        fn into_string_borrowed() {
663            let s = "this long string is borrowed";
664            let s: CowStr = s.into();
665            if let CowStr::Borrowed(_) = s {
666            } else {
667                panic!("Expected Borrowed case");
668            }
669
670            let s2 = s.into_string();
671            assert_eq!(&s2, "this long string is borrowed");
672        }
673
674        #[test]
675        fn into_string_inlined() {
676            let s: CowStr = 's'.into();
677            if let CowStr::Inlined(_) = s {
678            } else {
679                panic!("Expected Inlined case");
680            }
681
682            let s2 = s.into_string();
683            assert_eq!(&s2, "s");
684        }
685    }
686}