Skip to main content

asciidoc_parser/
strings.rs

1// Adapted from pulldown-cmark, which comes with the following license:
2//
3// Copyright 2015 Google Inc. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21// THE SOFTWARE.
22
23//! String types that facilitate parsing.
24
25use std::{
26    borrow::{Borrow, Cow},
27    fmt,
28    hash::{Hash, Hasher},
29    ops::Deref,
30    str::from_utf8,
31};
32
33pub(crate) const MAX_INLINE_STR_LEN: usize = 3 * std::mem::size_of::<isize>() - 2;
34
35/// Returned when trying to convert a `&str` into an [`InlineStr`] but it fails
36/// because it doesn't fit.
37#[derive(Debug)]
38pub struct StringTooLongError;
39
40/// An inline string that can contain almost three words
41/// of UTF-8 text.
42#[derive(Debug, Clone, Copy, Eq)]
43pub struct InlineStr {
44    inner: [u8; MAX_INLINE_STR_LEN],
45    len: u8,
46}
47
48impl AsRef<str> for InlineStr {
49    fn as_ref(&self) -> &str {
50        self.deref()
51    }
52}
53
54impl Hash for InlineStr {
55    fn hash<H: Hasher>(&self, state: &mut H) {
56        self.deref().hash(state);
57    }
58}
59
60impl From<char> for InlineStr {
61    fn from(c: char) -> Self {
62        let mut inner = [0u8; MAX_INLINE_STR_LEN];
63        c.encode_utf8(&mut inner);
64        let len = c.len_utf8() as u8;
65        Self { inner, len }
66    }
67}
68
69impl std::cmp::PartialEq<InlineStr> for InlineStr {
70    fn eq(&self, other: &InlineStr) -> bool {
71        self.deref() == other.deref()
72    }
73}
74
75impl TryFrom<&str> for InlineStr {
76    type Error = StringTooLongError;
77
78    fn try_from(s: &str) -> Result<InlineStr, StringTooLongError> {
79        let len = s.len();
80        if len <= MAX_INLINE_STR_LEN {
81            let mut inner = [0u8; MAX_INLINE_STR_LEN];
82
83            debug_assert!(
84                len <= MAX_INLINE_STR_LEN,
85                "InlineStr: len {} exceeds MAX_INLINE_STR_LEN {}",
86                len,
87                MAX_INLINE_STR_LEN
88            );
89
90            if let Some(dest) = inner.get_mut(..len) {
91                dest.copy_from_slice(s.as_bytes());
92            }
93
94            let len = len as u8;
95            Ok(Self { inner, len })
96        } else {
97            Err(StringTooLongError)
98        }
99    }
100}
101
102impl Deref for InlineStr {
103    type Target = str;
104
105    fn deref(&self) -> &str {
106        let len = self.len as usize;
107        self.inner
108            .get(..len)
109            .and_then(|bytes| from_utf8(bytes).ok())
110            .unwrap_or_default()
111    }
112}
113
114impl fmt::Display for InlineStr {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        write!(f, "{}", self.as_ref())
117    }
118}
119
120/// A copy-on-write string that can be owned, borrowed,
121/// or inlined.
122///
123/// It is three words long.
124///
125/// NOTE: The [`Debug`] implementation for this struct elides the storage
126/// mechanism that is chosen when pretty printing (as occurs when using the
127/// `dbg!()` macro. To obtain that information, use the β€œnormal” debug
128/// formatting as shown below:
129///
130/// ```
131/// # use asciidoc_parser::strings::CowStr;
132///
133/// let s: &'static str = "0123456789abcdefghijklm";
134/// let s: CowStr = s.into();
135/// assert_eq!(
136///     format!("{s:?}"),
137///     "CowStr::Borrowed(\"0123456789abcdefghijklm\")"
138/// );
139/// ```
140#[derive(Eq)]
141pub enum CowStr<'a> {
142    /// An owned, immutable string.
143    Boxed(Box<str>),
144    /// A borrowed string.
145    Borrowed(&'a str),
146    /// A short inline string.
147    Inlined(InlineStr),
148}
149
150impl AsRef<str> for CowStr<'_> {
151    fn as_ref(&self) -> &str {
152        self.deref()
153    }
154}
155
156impl Hash for CowStr<'_> {
157    fn hash<H: Hasher>(&self, state: &mut H) {
158        self.deref().hash(state);
159    }
160}
161
162impl std::clone::Clone for CowStr<'_> {
163    fn clone(&self) -> Self {
164        match self {
165            CowStr::Boxed(s) => match InlineStr::try_from(&**s) {
166                Ok(inline) => CowStr::Inlined(inline),
167                Err(..) => CowStr::Boxed(s.clone()),
168            },
169            CowStr::Borrowed(s) => CowStr::Borrowed(s),
170            CowStr::Inlined(s) => CowStr::Inlined(*s),
171        }
172    }
173}
174
175impl<'a> std::cmp::PartialEq<CowStr<'a>> for CowStr<'a> {
176    fn eq(&self, other: &CowStr<'_>) -> bool {
177        self.deref() == other.deref()
178    }
179}
180
181impl<'a> From<&'a str> for CowStr<'a> {
182    fn from(s: &'a str) -> Self {
183        CowStr::Borrowed(s)
184    }
185}
186
187impl From<String> for CowStr<'_> {
188    fn from(s: String) -> Self {
189        CowStr::Boxed(s.into_boxed_str())
190    }
191}
192
193impl From<char> for CowStr<'_> {
194    fn from(c: char) -> Self {
195        CowStr::Inlined(c.into())
196    }
197}
198
199impl<'a> From<Cow<'a, str>> for CowStr<'a> {
200    fn from(s: Cow<'a, str>) -> Self {
201        match s {
202            Cow::Borrowed(s) => CowStr::Borrowed(s),
203            Cow::Owned(s) => CowStr::Boxed(s.into_boxed_str()),
204        }
205    }
206}
207
208impl<'a> From<CowStr<'a>> for Cow<'a, str> {
209    fn from(s: CowStr<'a>) -> Self {
210        match s {
211            CowStr::Boxed(s) => Cow::Owned(s.to_string()),
212            CowStr::Inlined(s) => Cow::Owned(s.to_string()),
213            CowStr::Borrowed(s) => Cow::Borrowed(s),
214        }
215    }
216}
217
218impl<'a> From<Cow<'a, char>> for CowStr<'a> {
219    fn from(s: Cow<'a, char>) -> Self {
220        CowStr::Inlined(InlineStr::from(*s))
221    }
222}
223
224impl Deref for CowStr<'_> {
225    type Target = str;
226
227    fn deref(&self) -> &str {
228        match self {
229            CowStr::Boxed(b) => b,
230            CowStr::Borrowed(b) => b,
231            CowStr::Inlined(s) => s.deref(),
232        }
233    }
234}
235
236impl Borrow<str> for CowStr<'_> {
237    fn borrow(&self) -> &str {
238        self.deref()
239    }
240}
241
242impl CowStr<'_> {
243    /// Convert the `CowStr` into an owned `String`.
244    pub fn into_string(self) -> String {
245        match self {
246            CowStr::Boxed(b) => b.into(),
247            CowStr::Borrowed(b) => b.to_owned(),
248            CowStr::Inlined(s) => s.deref().to_owned(),
249        }
250    }
251}
252
253impl fmt::Display for CowStr<'_> {
254    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
255        write!(f, "{}", self.as_ref())
256    }
257}
258
259impl fmt::Debug for CowStr<'_> {
260    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
261        if f.alternate() {
262            write!(f, "{:?}", self.as_ref())
263        } else {
264            match self {
265                Self::Boxed(b) => f.debug_tuple("CowStr::Boxed").field(b).finish(),
266                Self::Borrowed(b) => f.debug_tuple("CowStr::Borrowed").field(b).finish(),
267                Self::Inlined(s) => f.debug_tuple("CowStr::Inlined").field(s).finish(),
268            }
269        }
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    #![allow(clippy::panic)]
276    #![allow(clippy::unwrap_used)]
277
278    mod string_too_long_err {
279        use crate::strings::StringTooLongError;
280
281        #[test]
282        fn impl_debug() {
283            let e = StringTooLongError;
284            assert_eq!(format!("{e:#?}"), "StringTooLongError");
285        }
286    }
287
288    mod inline_str {
289        use std::ops::Deref;
290
291        use crate::strings::*;
292
293        #[test]
294        fn from_ascii() {
295            let s: InlineStr = 'a'.into();
296            assert_eq!("a", s.deref());
297        }
298
299        #[test]
300        fn from_unicode() {
301            let s: InlineStr = 'πŸ”'.into();
302            assert_eq!("πŸ”", s.deref());
303        }
304
305        #[test]
306        fn impl_debug() {
307            let s: InlineStr = 'a'.into();
308            assert_eq!(
309                format!("{s:#?}"),
310                r#"InlineStr {
311    inner: [
312        97,
313        0,
314        0,
315        0,
316        0,
317        0,
318        0,
319        0,
320        0,
321        0,
322        0,
323        0,
324        0,
325        0,
326        0,
327        0,
328        0,
329        0,
330        0,
331        0,
332        0,
333        0,
334    ],
335    len: 1,
336}"#
337            );
338        }
339
340        #[test]
341        fn impl_hash() {
342            use std::{
343                collections::hash_map::DefaultHasher,
344                hash::{Hash, Hasher},
345            };
346
347            let mut hasher = DefaultHasher::new();
348            "πŸ”".hash(&mut hasher);
349            let expected = hasher.finish();
350
351            let s: InlineStr = 'πŸ”'.into();
352            let mut hasher = DefaultHasher::new();
353            s.hash(&mut hasher);
354            let actual = hasher.finish();
355
356            let s: InlineStr = 'a'.into();
357            let mut hasher = DefaultHasher::new();
358            s.hash(&mut hasher);
359            let mismatch = hasher.finish();
360
361            assert_eq!(expected, actual);
362            assert_ne!(expected, mismatch);
363        }
364
365        #[test]
366        fn impl_partial_eq() {
367            let s1: InlineStr = 'πŸ”'.into();
368            let s2: InlineStr = 'πŸ”'.into();
369            let s3: InlineStr = 'a'.into();
370
371            assert_eq!(s1, s2);
372            assert_ne!(s1, s3);
373        }
374
375        #[test]
376        #[allow(clippy::assertions_on_constants)]
377        fn max_len_atleast_four() {
378            // we need 4 bytes to store a char
379            assert!(MAX_INLINE_STR_LEN >= 4);
380        }
381
382        #[test]
383        #[cfg(target_pointer_width = "64")]
384        fn fits_twentytwo() {
385            let s = "0123456789abcdefghijkl";
386            let stack_str = InlineStr::try_from(s).unwrap();
387            assert_eq!(stack_str.deref(), s);
388        }
389
390        #[test]
391        #[cfg(target_pointer_width = "64")]
392        fn doesnt_fit_twentythree() {
393            let s = "0123456789abcdefghijklm";
394            let _stack_str = InlineStr::try_from(s).unwrap_err();
395        }
396    }
397
398    mod cow_str {
399        use std::{
400            borrow::{Borrow, Cow},
401            ops::Deref,
402        };
403
404        use crate::strings::*;
405
406        #[test]
407        fn size() {
408            let size = std::mem::size_of::<CowStr>();
409            let word_size = std::mem::size_of::<isize>();
410            assert_eq!(3 * word_size, size);
411        }
412
413        #[test]
414        fn char_to_string() {
415            let c = '藏';
416            let smort: CowStr = c.into();
417            let owned: String = smort.to_string();
418            let expected = "藏".to_owned();
419            assert_eq!(expected, owned);
420        }
421
422        #[test]
423        #[cfg(target_pointer_width = "64")]
424        fn small_boxed_str_clones_to_stack() {
425            let s = "0123456789abcde".to_owned();
426            let smort: CowStr = s.into();
427            let smort_clone = smort.clone();
428
429            if let CowStr::Inlined(..) = smort_clone {
430            } else {
431                panic!("Expected a Inlined variant!");
432            }
433        }
434
435        #[test]
436        fn cow_to_cow_str() {
437            let s = "some text";
438            let cow = Cow::Borrowed(s);
439            let actual = CowStr::from(cow);
440            let expected = CowStr::Borrowed(s);
441            assert_eq!(actual, expected);
442            assert!(variant_eq(&actual, &expected));
443
444            let s = "some text".to_string();
445            let cow: Cow<str> = Cow::Owned(s.clone());
446            let actual = CowStr::from(cow);
447            let expected = CowStr::Boxed(s.into_boxed_str());
448            assert_eq!(actual, expected);
449            assert!(variant_eq(&actual, &expected));
450        }
451
452        #[test]
453        fn cow_str_to_cow() {
454            let s = "some text";
455            let cow_str = CowStr::Borrowed(s);
456            let actual = Cow::from(cow_str);
457            let expected = Cow::Borrowed(s);
458            assert_eq!(actual, expected);
459            assert!(variant_eq(&actual, &expected));
460
461            let s = "s";
462            let inline_str: InlineStr = InlineStr::try_from(s).unwrap();
463            let cow_str = CowStr::Inlined(inline_str);
464            let actual = Cow::from(cow_str);
465            let expected: Cow<str> = Cow::Owned(s.to_string());
466            assert_eq!(actual, expected);
467            assert!(variant_eq(&actual, &expected));
468
469            let s = "s";
470            let cow_str = CowStr::Boxed(s.to_string().into_boxed_str());
471            let actual = Cow::from(cow_str);
472            let expected: Cow<str> = Cow::Owned(s.to_string());
473            assert_eq!(actual, expected);
474            assert!(variant_eq(&actual, &expected));
475        }
476
477        #[test]
478        fn cow_char_to_cow_str() {
479            let c = 'c';
480            let cow: Cow<char> = Cow::Owned(c);
481            let actual = CowStr::from(cow);
482            let expected = CowStr::Inlined(InlineStr::from(c));
483            assert_eq!(actual, expected);
484            assert!(variant_eq(&actual, &expected));
485
486            let c = 'c';
487            let cow: Cow<char> = Cow::Borrowed(&c);
488            let actual = CowStr::from(cow);
489            let expected = CowStr::Inlined(InlineStr::from(c));
490            assert_eq!(actual, expected);
491            assert!(variant_eq(&actual, &expected));
492        }
493
494        fn variant_eq<T>(a: &T, b: &T) -> bool {
495            std::mem::discriminant(a) == std::mem::discriminant(b)
496        }
497
498        #[test]
499        fn impl_debug_pretty_print_for_inline() {
500            let c = '藏';
501            let s: CowStr = c.into();
502
503            assert_eq!(format!("{s:#?}"), r#""藏""#);
504        }
505
506        #[test]
507        fn impl_debug_pretty_print_for_boxed() {
508            let s = "blah blah blah".to_string();
509            let s: CowStr = s.into();
510
511            assert_eq!(format!("{s:#?}"), r#""blah blah blah""#);
512        }
513
514        #[test]
515        fn impl_debug_pretty_print_for_borrowed() {
516            let s: &'static str = "0123456789abcdefghijklm";
517            let s: CowStr = s.into();
518
519            assert_eq!(format!("{s:#?}"), r#""0123456789abcdefghijklm""#);
520        }
521
522        #[test]
523        fn impl_debug_for_inline() {
524            let c = '藏';
525            let s: CowStr = c.into();
526
527            assert_eq!(
528                format!("{s:?}"),
529                "CowStr::Inlined(InlineStr { inner: [232, 151, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], len: 3 })"
530            );
531        }
532
533        #[test]
534        fn impl_debug_for_boxed() {
535            let s = "blah blah blah".to_string();
536            let s: CowStr = s.into();
537
538            assert_eq!(format!("{s:?}"), "CowStr::Boxed(\"blah blah blah\")");
539        }
540
541        #[test]
542        fn impl_debug_for_borrowed() {
543            let s: &'static str = "0123456789abcdefghijklm";
544            let s: CowStr = s.into();
545
546            assert_eq!(
547                format!("{s:?}"),
548                "CowStr::Borrowed(\"0123456789abcdefghijklm\")"
549            );
550        }
551
552        #[test]
553        fn impl_clone_boxed_long() {
554            let s = "this string won't fit in a box".to_owned();
555            let s: CowStr = s.into();
556            if let CowStr::Boxed(_) = s {
557            } else {
558                panic!("Expected Boxed case");
559            }
560
561            let s2 = s.clone();
562            assert_eq!(s.deref(), s2.deref());
563
564            if let CowStr::Boxed(_) = s2 {
565            } else {
566                panic!("Expected Boxed clone");
567            }
568        }
569
570        #[test]
571        fn impl_clone_borrowed() {
572            let s = "this long string is borrowed";
573            let s: CowStr = s.into();
574            if let CowStr::Borrowed(_) = s {
575            } else {
576                panic!("Expected Borrowed case");
577            }
578
579            let s2 = s.clone();
580            assert_eq!(s.deref(), s2.deref());
581
582            if let CowStr::Borrowed(_) = s2 {
583            } else {
584                panic!("Expected Borrowed clone");
585            }
586        }
587
588        #[test]
589        fn impl_clone_inlined() {
590            let s: CowStr = 's'.into();
591            if let CowStr::Inlined(_) = s {
592            } else {
593                panic!("Expected Inlined case");
594            }
595
596            let s2 = s.clone();
597            assert_eq!(s.deref(), s2.deref());
598
599            if let CowStr::Inlined(_) = s2 {
600            } else {
601                panic!("Expected Inlined clone");
602            }
603        }
604
605        #[test]
606        fn impl_hash() {
607            use std::{
608                collections::hash_map::DefaultHasher,
609                hash::{Hash, Hasher},
610            };
611
612            let mut hasher = DefaultHasher::new();
613            "πŸ”".hash(&mut hasher);
614            let expected = hasher.finish();
615
616            let s: CowStr = 'πŸ”'.into();
617            if let CowStr::Inlined(_) = s {
618            } else {
619                panic!("Expected Inlined case");
620            }
621            let mut hasher = DefaultHasher::new();
622            s.hash(&mut hasher);
623            let actual = hasher.finish();
624            assert_eq!(expected, actual);
625
626            let s = CowStr::Borrowed("πŸ”");
627            let mut hasher = DefaultHasher::new();
628            s.hash(&mut hasher);
629            let actual = hasher.finish();
630            assert_eq!(expected, actual);
631
632            let s = "πŸ”".to_owned();
633            let s: CowStr = s.into();
634            if let CowStr::Boxed(_) = s {
635            } else {
636                panic!("Expected Boxed case");
637            }
638            let mut hasher = DefaultHasher::new();
639            s.hash(&mut hasher);
640            assert_eq!(expected, actual);
641        }
642
643        #[test]
644        fn impl_from_str() {
645            let s = "xyz";
646            let s: CowStr = s.into();
647            assert_eq!(s.deref(), "xyz");
648
649            if let CowStr::Borrowed(_) = s {
650            } else {
651                panic!("Expected Borrowed case");
652            }
653        }
654
655        #[test]
656        fn impl_borrow() {
657            let s: CowStr = "xyz".into();
658            let s: &str = s.borrow();
659            assert_eq!(s, "xyz");
660        }
661
662        #[test]
663        fn into_string_boxed() {
664            let s = "this string won't fit in a box".to_owned();
665            let s: CowStr = s.into();
666            if let CowStr::Boxed(_) = s {
667            } else {
668                panic!("Expected Boxed case");
669            }
670
671            let s2 = s.into_string();
672            assert_eq!(&s2, "this string won't fit in a box");
673        }
674
675        #[test]
676        fn into_string_borrowed() {
677            let s = "this long string is borrowed";
678            let s: CowStr = s.into();
679            if let CowStr::Borrowed(_) = s {
680            } else {
681                panic!("Expected Borrowed case");
682            }
683
684            let s2 = s.into_string();
685            assert_eq!(&s2, "this long string is borrowed");
686        }
687
688        #[test]
689        fn into_string_inlined() {
690            let s: CowStr = 's'.into();
691            if let CowStr::Inlined(_) = s {
692            } else {
693                panic!("Expected Inlined case");
694            }
695
696            let s2 = s.into_string();
697            assert_eq!(&s2, "s");
698        }
699    }
700}