compact_strings/
compact_strings.rs

1use core::{
2    fmt::Debug,
3    ops::{Deref, Index},
4};
5
6use crate::CompactBytestrings;
7
8/// A more compact but limited representation of a list of strings.
9///
10/// Strings are stored contiguously in a vector of bytes, with their lengths and starting indices
11/// being stored separately.
12///
13/// Limitations include being unable to mutate strings stored in the vector.
14///
15/// # Examples
16/// ```
17/// # use compact_strings::CompactStrings;
18/// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
19///
20/// cmpstrs.push("One");
21/// cmpstrs.push("Two");
22/// cmpstrs.push("Three");
23///
24/// cmpstrs.remove(1);
25///
26/// assert_eq!(cmpstrs.get(0), Some("One"));
27/// assert_eq!(cmpstrs.get(1), Some("Three"));
28/// assert_eq!(cmpstrs.get(2), None);
29/// ```
30#[repr(transparent)]
31#[derive(Clone)]
32pub struct CompactStrings(pub(crate) CompactBytestrings);
33
34impl CompactStrings {
35    /// Constructs a new, empty [`CompactStrings`].
36    ///
37    /// The [`CompactStrings`] will not allocate until strings are pushed into it.
38    ///
39    /// # Examples
40    /// ```
41    /// # use compact_strings::CompactStrings;
42    /// let mut cmpstrs = CompactStrings::new();
43    /// ```
44    #[must_use]
45    pub const fn new() -> Self {
46        Self(CompactBytestrings::new())
47    }
48
49    /// Constructs a new, empty [`CompactStrings`] with at least the specified capacities in each
50    /// vector.
51    ///
52    /// - `data_capacity`: The capacity of the data vector where the bytes of the strings are stored.
53    /// - `capacity_meta`: The capacity of the meta vector where the starting indices and lengths
54    /// of the strings are stored.
55    ///
56    /// The [`CompactStrings`] will be able to hold at least *`data_capacity`* bytes worth of strings
57    /// without reallocating the data vector, and at least *`capacity_meta`* of starting indices and
58    /// lengths without reallocating the meta vector. This method is allowed to allocate for more bytes
59    /// than the capacities. If a capacity is 0, the vector will not allocate.
60    ///
61    /// It is important to note that although the data and meta vectors have the
62    /// minimum capacities specified, they will have a zero *length*.
63    ///
64    /// If it is important to know the exact allocated capacity of the data vector, always use the
65    /// [`capacity`] method after construction.
66    ///
67    /// [`capacity`]: CompactStrings::capacity
68    ///
69    /// # Examples
70    /// ```
71    /// # use compact_strings::CompactStrings;
72    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
73    ///
74    /// assert_eq!(cmpstrs.len(), 0);
75    /// assert!(cmpstrs.capacity() >= 20);
76    /// assert!(cmpstrs.capacity_meta() >= 3);
77    /// ```
78    #[must_use]
79    pub fn with_capacity(data_capacity: usize, capacity_meta: usize) -> Self {
80        Self(CompactBytestrings::with_capacity(
81            data_capacity,
82            capacity_meta,
83        ))
84    }
85
86    /// Appends a string to the back of the [`CompactStrings`].
87    ///
88    /// # Examples
89    /// ```
90    /// # use compact_strings::CompactStrings;
91    /// let mut cmpstrs = CompactStrings::new();
92    /// cmpstrs.push("One");
93    /// cmpstrs.push("Two");
94    /// cmpstrs.push("Three");
95    ///
96    /// assert_eq!(cmpstrs.get(0), Some("One"));
97    /// assert_eq!(cmpstrs.get(1), Some("Two"));
98    /// assert_eq!(cmpstrs.get(2), Some("Three"));
99    /// assert_eq!(cmpstrs.get(3), None);
100    /// ```
101    pub fn push<S>(&mut self, string: S)
102    where
103        S: Deref<Target = str>,
104    {
105        self.0.push(string.as_bytes());
106    }
107
108    /// Returns a reference to the string stored in the [`CompactStrings`] at that position.
109    ///
110    /// # Examples
111    /// ```
112    /// # use compact_strings::CompactStrings;
113    /// let mut cmpstrs = CompactStrings::new();
114    /// cmpstrs.push("One");
115    /// cmpstrs.push("Two");
116    /// cmpstrs.push("Three");
117    ///
118    /// assert_eq!(cmpstrs.get(0), Some("One"));
119    /// assert_eq!(cmpstrs.get(1), Some("Two"));
120    /// assert_eq!(cmpstrs.get(2), Some("Three"));
121    /// assert_eq!(cmpstrs.get(3), None);
122    /// ```
123    #[must_use]
124    pub fn get(&self, index: usize) -> Option<&str> {
125        let bytes = self.0.get(index)?;
126        if cfg!(feature = "no_unsafe") {
127            core::str::from_utf8(bytes).ok()
128        } else {
129            unsafe { Some(core::str::from_utf8_unchecked(bytes)) }
130        }
131    }
132
133    /// Returns a reference to the string stored in the [`CompactStrings`] at that position, without
134    /// doing bounds checking.
135    ///
136    /// # Safety
137    /// Calling this method with an out-of-bounds index is undefined behavior even if the resulting reference is not used.
138    ///
139    /// # Examples
140    /// ```
141    /// # use compact_strings::CompactStrings;
142    /// let mut cmpstrs = CompactStrings::new();
143    /// cmpstrs.push("One");
144    /// cmpstrs.push("Two");
145    /// cmpstrs.push("Three");
146    ///
147    /// unsafe {
148    ///     assert_eq!(cmpstrs.get_unchecked(0), "One");
149    ///     assert_eq!(cmpstrs.get_unchecked(1), "Two");
150    ///     assert_eq!(cmpstrs.get_unchecked(2), "Three");
151    /// }
152    /// ```
153    #[must_use]
154    #[cfg(not(feature = "no_unsafe"))]
155    pub unsafe fn get_unchecked(&self, index: usize) -> &str {
156        let bytes = self.0.get_unchecked(index);
157        core::str::from_utf8_unchecked(bytes)
158    }
159
160    /// Returns the number of strings in the [`CompactStrings`], also referred to as its 'length'.
161    ///
162    /// # Examples
163    /// ```
164    /// # use compact_strings::CompactStrings;
165    /// let mut cmpstrs = CompactStrings::new();
166    ///
167    /// cmpstrs.push("One");
168    /// cmpstrs.push("Two");
169    /// cmpstrs.push("Three");
170    ///
171    /// assert_eq!(cmpstrs.len(), 3);
172    /// ```
173    #[inline]
174    #[must_use]
175    pub fn len(&self) -> usize {
176        self.0.len()
177    }
178
179    /// Returns true if the [`CompactStrings`] contains no strings.
180    ///
181    /// # Examples
182    /// ```
183    /// # use compact_strings::CompactStrings;
184    /// let mut cmpstrs = CompactStrings::new();
185    /// assert!(cmpstrs.is_empty());
186    ///
187    /// cmpstrs.push("One");
188    ///
189    /// assert!(!cmpstrs.is_empty());
190    /// ```
191    #[inline]
192    #[must_use]
193    pub fn is_empty(&self) -> bool {
194        self.len() == 0
195    }
196
197    /// Returns the number of bytes the data vector can store without reallocating.
198    ///
199    /// # Examples
200    /// ```
201    /// # use compact_strings::CompactStrings;
202    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
203    ///
204    /// cmpstrs.push("One");
205    ///
206    /// assert!(cmpstrs.capacity() >= 20);
207    /// ```
208    #[inline]
209    #[must_use]
210    pub fn capacity(&self) -> usize {
211        self.0.capacity()
212    }
213
214    /// Returns the number of starting indices and lengths can store without reallocating.
215    ///
216    /// # Examples
217    /// ```
218    /// # use compact_strings::CompactStrings;
219    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
220    ///
221    /// cmpstrs.push("One");
222    /// cmpstrs.push("Two");
223    /// cmpstrs.push("Three");
224    /// assert!(cmpstrs.capacity_meta() >= 3);
225    ///
226    /// cmpstrs.push("Three");
227    /// assert!(cmpstrs.capacity_meta() > 3);
228    /// ```
229    #[inline]
230    #[must_use]
231    pub fn capacity_meta(&self) -> usize {
232        self.0.capacity_meta()
233    }
234
235    /// Clears the [`CompactStrings`], removing all strings.
236    ///
237    /// Note that this method has no effect on the allocated capacity of the vectors.
238    ///
239    /// # Examples
240    /// ```
241    /// # use compact_strings::CompactStrings;
242    /// let mut cmpstrs = CompactStrings::new();
243    ///
244    /// cmpstrs.push("One");
245    /// cmpstrs.push("Two");
246    /// cmpstrs.push("Three");
247    /// cmpstrs.clear();
248    ///
249    /// assert!(cmpstrs.is_empty());
250    /// ```
251    pub fn clear(&mut self) {
252        self.0.clear();
253    }
254
255    /// Shrinks the capacity of the data vector, which stores the bytes of the held strings, as much as possible.
256    ///
257    /// It will drop down as close as possible to the length but the allocator
258    /// may still inform the vector that there is space for a few more elements.
259    ///
260    /// # Examples
261    /// ```
262    /// # use compact_strings::CompactStrings;
263    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
264    ///
265    /// cmpstrs.push("One");
266    /// cmpstrs.push("Two");
267    /// cmpstrs.push("Three");
268    ///
269    /// assert!(cmpstrs.capacity() >= 20);
270    /// cmpstrs.shrink_to_fit();
271    /// assert!(cmpstrs.capacity() >= 3);
272    /// ```
273    #[inline]
274    pub fn shrink_to_fit(&mut self) {
275        self.0.shrink_to_fit();
276    }
277
278    /// Shrinks the capacity of the info vector, which stores the starting indices and lengths of
279    /// the held strings, as much as possible.
280    ///
281    /// It will drop down as close as possible to the length but the allocator
282    /// may still inform the vector that there is space for a few more elements.
283    ///
284    /// # Examples
285    /// ```
286    /// # use compact_strings::CompactStrings;
287    /// let mut cmpstrs = CompactStrings::with_capacity(20, 10);
288    ///
289    /// cmpstrs.push("One");
290    /// cmpstrs.push("Two");
291    /// cmpstrs.push("Three");
292    ///
293    /// assert!(cmpstrs.capacity_meta() >= 10);
294    /// cmpstrs.shrink_to_fit();
295    /// assert!(cmpstrs.capacity_meta() >= 3);
296    /// ```
297    #[inline]
298    pub fn shrink_meta_to_fit(&mut self) {
299        self.0.shrink_meta_to_fit();
300    }
301
302    /// Shrinks the capacity of the data vector, which stores the bytes of the held strings, with a lower bound.
303    ///
304    /// The capacity will remain at least as large as both the length and the supplied value.
305    ///
306    /// If the current capacity is less than the lower limit, this is a no-op.
307    ///
308    /// # Examples
309    /// ```
310    /// # use compact_strings::CompactStrings;
311    /// let mut cmpstrs = CompactStrings::with_capacity(20, 4);
312    ///
313    /// cmpstrs.push("One");
314    /// cmpstrs.push("Two");
315    /// cmpstrs.push("Three");
316    ///
317    /// assert!(cmpstrs.capacity() >= 20);
318    /// cmpstrs.shrink_to(4);
319    /// assert!(cmpstrs.capacity() >= 4);
320    /// ```
321    #[inline]
322    pub fn shrink_to(&mut self, min_capacity: usize) {
323        self.0.shrink_to(min_capacity);
324    }
325
326    /// Shrinks the capacity of the meta vector, which starting indices and lengths of the held strings,
327    /// with a lower bound.
328    ///
329    /// The capacity will remain at least as large as both the length and the supplied value.
330    ///
331    /// If the current capacity is less than the lower limit, this is a no-op.
332    ///
333    /// # Examples
334    /// ```
335    /// # use compact_strings::CompactStrings;
336    /// let mut cmpstrs = CompactStrings::with_capacity(20, 10);
337    ///
338    /// cmpstrs.push("One");
339    /// cmpstrs.push("Two");
340    /// cmpstrs.push("Three");
341    ///
342    /// assert!(cmpstrs.capacity_meta() >= 10);
343    /// cmpstrs.shrink_meta_to(4);
344    /// assert!(cmpstrs.capacity_meta() >= 4);
345    /// ```
346    #[inline]
347    pub fn shrink_meta_to(&mut self, min_capacity: usize) {
348        self.0.shrink_meta_to(min_capacity);
349    }
350
351    /// Removes the data pointing to where the string at the specified index is stored.
352    ///
353    /// Note: This does not remove the bytes of the string from memory, you may want to use
354    /// [`remove`] if you desire that behavior.
355    ///
356    /// Note: Because this shifts over the remaining elements in the meta vector, it has a
357    /// worst-case performance of *O*(*n*).
358    ///
359    /// [`remove`]: CompactStrings::remove
360    ///
361    /// # Examples
362    /// ```
363    /// # use compact_strings::CompactStrings;
364    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
365    ///
366    /// cmpstrs.push("One");
367    /// cmpstrs.push("Two");
368    /// cmpstrs.push("Three");
369    ///
370    /// cmpstrs.ignore(1);
371    ///
372    /// assert_eq!(cmpstrs.get(0), Some("One"));
373    /// assert_eq!(cmpstrs.get(1), Some("Three"));
374    /// assert_eq!(cmpstrs.get(2), None);
375    /// ```
376    pub fn ignore(&mut self, index: usize) {
377        self.0.ignore(index);
378    }
379
380    /// Removes the bytes of the string and data pointing to the string is stored.
381    ///
382    /// Note: This does not shrink the vectors where the bytes of the string and data to the string
383    /// are stored. You may shrink the data vector with [`shrink_to`] and [`shrink_to_fit`] and the
384    /// meta vector with [`shrink_meta_to`] and [`shrink_meta_to_fit`].
385    ///
386    /// Note: Because this shifts over the remaining elements in both data and meta vectors, it
387    /// has a worst-case performance of *O*(*n*). If you don't need the bytes of the string to
388    /// be removed, use [`ignore`] instead.
389    ///
390    /// [`shrink_to`]: CompactStrings::shrink_to
391    /// [`shrink_to_fit`]: CompactStrings::shrink_to_fit
392    /// [`shrink_meta_to`]: CompactStrings::shrink_meta_to
393    /// [`shrink_meta_to_fit`]: CompactStrings::shrink_meta_to_fit
394    /// [`ignore`]: CompactStrings::ignore
395    ///
396    /// # Examples
397    /// ```
398    /// # use compact_strings::CompactStrings;
399    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
400    ///
401    /// cmpstrs.push("One");
402    /// cmpstrs.push("Two");
403    /// cmpstrs.push("Three");
404    ///
405    /// cmpstrs.remove(1);
406    ///
407    /// assert_eq!(cmpstrs.get(0), Some("One"));
408    /// assert_eq!(cmpstrs.get(1), Some("Three"));
409    /// assert_eq!(cmpstrs.get(2), None);
410    /// ```
411    pub fn remove(&mut self, index: usize) {
412        self.0.remove(index);
413    }
414
415    /// Returns an iterator over the slice.
416    ///
417    /// The iterator yields all items from start to end.
418    ///
419    /// # Examples
420    ///
421    /// ```
422    /// # use compact_strings::CompactStrings;
423    /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
424    /// cmpstrs.push("One");
425    /// cmpstrs.push("Two");
426    /// cmpstrs.push("Three");
427    /// let mut iterator = cmpstrs.iter();
428    ///
429    /// assert_eq!(iterator.next(), Some("One"));
430    /// assert_eq!(iterator.next(), Some("Two"));
431    /// assert_eq!(iterator.next(), Some("Three"));
432    /// assert_eq!(iterator.next(), None);
433    /// ```
434    #[inline]
435    #[must_use]
436    pub fn iter(&self) -> Iter<'_> {
437        Iter(self.0.iter())
438    }
439}
440
441impl PartialEq for CompactStrings {
442    fn eq(&self, other: &Self) -> bool {
443        let len = self.len();
444        if len != other.len() {
445            return false;
446        }
447
448        for idx in 0..len {
449            if self[idx] != other[idx] {
450                return false;
451            }
452        }
453
454        true
455    }
456}
457
458impl Debug for CompactStrings {
459    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
460        f.debug_list().entries(self.iter()).finish()
461    }
462}
463
464impl<S> Extend<S> for CompactStrings
465where
466    S: Deref<Target = str>,
467{
468    #[inline]
469    fn extend<I: IntoIterator<Item = S>>(&mut self, iter: I) {
470        for s in iter {
471            self.push(s);
472        }
473    }
474}
475
476impl Index<usize> for CompactStrings {
477    type Output = str;
478
479    #[inline]
480    fn index(&self, index: usize) -> &Self::Output {
481        self.get(index).unwrap()
482    }
483}
484
485/// Iterator over strings in a [`CompactStrings`]
486///
487/// # Examples
488/// ```
489/// # use compact_strings::CompactStrings;
490/// let mut cmpstrs = CompactStrings::new();
491/// cmpstrs.push("One");
492/// cmpstrs.push("Two");
493/// cmpstrs.push("Three");
494///
495/// let mut iter = cmpstrs.into_iter();
496/// assert_eq!(iter.next(), Some("One"));
497/// assert_eq!(iter.next(), Some("Two"));
498/// assert_eq!(iter.next(), Some("Three"));
499/// assert_eq!(iter.next(), None);
500/// ```
501pub struct Iter<'a>(crate::compact_bytestrings::Iter<'a>);
502
503impl<'a> Iter<'a> {
504    pub fn new(inner: &'a CompactStrings) -> Self {
505        Self(inner.0.iter())
506    }
507
508    fn from_utf8_maybe_checked(bytes: &[u8]) -> Option<&str> {
509        if cfg!(feature = "no_unsafe") {
510            core::str::from_utf8(bytes).ok()
511        } else {
512            Some(unsafe { core::str::from_utf8_unchecked(bytes) })
513        }
514    }
515}
516
517impl<'a> Iterator for Iter<'a> {
518    type Item = &'a str;
519
520    fn next(&mut self) -> Option<Self::Item> {
521        self.0.next().and_then(Self::from_utf8_maybe_checked)
522    }
523
524    fn nth(&mut self, n: usize) -> Option<Self::Item> {
525        self.0.nth(n).and_then(Self::from_utf8_maybe_checked)
526    }
527
528    #[inline]
529    fn count(self) -> usize
530    where
531        Self: Sized,
532    {
533        self.len()
534    }
535
536    #[inline]
537    fn last(mut self) -> Option<Self::Item>
538    where
539        Self: Sized,
540    {
541        self.next_back()
542    }
543
544    #[inline]
545    fn size_hint(&self) -> (usize, Option<usize>) {
546        self.0.size_hint()
547    }
548}
549
550impl<'a> DoubleEndedIterator for Iter<'a> {
551    fn next_back(&mut self) -> Option<Self::Item> {
552        self.0.next_back().and_then(Self::from_utf8_maybe_checked)
553    }
554
555    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
556        self.0.nth_back(n).and_then(Self::from_utf8_maybe_checked)
557    }
558}
559
560impl ExactSizeIterator for Iter<'_> {
561    #[inline]
562    fn len(&self) -> usize {
563        self.0.len()
564    }
565}
566
567impl<'a> IntoIterator for &'a CompactStrings {
568    type Item = &'a str;
569
570    type IntoIter = Iter<'a>;
571
572    #[inline]
573    fn into_iter(self) -> Self::IntoIter {
574        self.iter()
575    }
576}
577
578impl<S> FromIterator<S> for CompactStrings
579where
580    S: Deref<Target = str>,
581{
582    fn from_iter<I: IntoIterator<Item = S>>(iter: I) -> Self {
583        let iter = iter.into_iter();
584        let meta_capacity = match iter.size_hint() {
585            (a, Some(b)) if a == b => a,
586            _ => 0,
587        };
588
589        let mut out = CompactStrings::with_capacity(0, meta_capacity);
590        for s in iter {
591            out.push(s);
592        }
593
594        out
595    }
596}
597
598impl<S, I> From<I> for CompactStrings
599where
600    S: Deref<Target = str>,
601    I: IntoIterator<Item = S>,
602{
603    #[inline]
604    fn from(value: I) -> Self {
605        FromIterator::from_iter(value)
606    }
607}
608
609impl TryFrom<CompactBytestrings> for CompactStrings {
610    type Error = core::str::Utf8Error;
611
612    fn try_from(value: CompactBytestrings) -> Result<Self, Self::Error> {
613        for bstr in &value {
614            let _ = core::str::from_utf8(bstr)?;
615        }
616
617        Ok(Self(value))
618    }
619}
620
621#[cfg(test)]
622mod tests {
623    use crate::CompactStrings;
624
625    #[test]
626    fn exact_size_iterator() {
627        let mut cmpstrs = CompactStrings::new();
628
629        cmpstrs.push("One");
630        cmpstrs.push("Two");
631        cmpstrs.push("Three");
632
633        let mut iter = cmpstrs.iter();
634        assert_eq!(iter.len(), 3);
635        let _ = iter.next();
636        assert_eq!(iter.len(), 2);
637        let _ = iter.next();
638        assert_eq!(iter.len(), 1);
639        let _ = iter.next();
640        assert_eq!(iter.len(), 0);
641        let _ = iter.next();
642        assert_eq!(iter.len(), 0);
643    }
644
645    #[test]
646    fn double_ended_iterator() {
647        let mut cmpbytes = CompactStrings::new();
648
649        cmpbytes.push("One");
650        cmpbytes.push("Two");
651        cmpbytes.push("Three");
652        cmpbytes.push("Four");
653
654        let mut iter = cmpbytes.iter();
655        assert_eq!(iter.next(), Some("One"));
656        assert_eq!(iter.next_back(), Some("Four"));
657        assert_eq!(iter.next(), Some("Two"));
658        assert_eq!(iter.next_back(), Some("Three"));
659        assert_eq!(iter.next(), None);
660        assert_eq!(iter.next_back(), None);
661    }
662}
663
664#[cfg(feature = "serde")]
665mod serde {
666    use serde::{
667        de::{SeqAccess, Visitor},
668        Deserialize, Deserializer, Serialize,
669    };
670
671    use crate::CompactStrings;
672
673    impl Serialize for CompactStrings {
674        fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
675            serializer.collect_seq(self)
676        }
677    }
678
679    impl<'de> Deserialize<'de> for CompactStrings {
680        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
681        where
682            D: Deserializer<'de>,
683        {
684            deserializer.deserialize_seq(CompactStringsVisitor)
685        }
686    }
687
688    struct CompactStringsVisitor;
689
690    impl<'de> Visitor<'de> for CompactStringsVisitor {
691        type Value = CompactStrings;
692
693        fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result {
694            formatter.write_str("an array of strings")
695        }
696
697        #[inline]
698        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
699        where
700            A: SeqAccess<'de>,
701        {
702            let mut out = CompactStrings::with_capacity(0, seq.size_hint().unwrap_or_default());
703            while let Some(str) = seq.next_element::<&str>()? {
704                out.push(str);
705            }
706
707            Ok(out)
708        }
709    }
710}
711
712#[cfg(feature = "serde")]
713#[cfg_attr(feature = "serde", allow(unused_imports))]
714#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
715pub use self::serde::*;