compact_strings/compact_strings.rs
1use core::{
2 fmt::Debug,
3 ops::{Deref, Index},
4};
5
6use crate::CompactBytestrings;
7
8/// A more compact but limited representation of a list of strings.
9///
10/// Strings are stored contiguously in a vector of bytes, with their lengths and starting indices
11/// being stored separately.
12///
13/// Limitations include being unable to mutate strings stored in the vector.
14///
15/// # Examples
16/// ```
17/// # use compact_strings::CompactStrings;
18/// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
19///
20/// cmpstrs.push("One");
21/// cmpstrs.push("Two");
22/// cmpstrs.push("Three");
23///
24/// cmpstrs.remove(1);
25///
26/// assert_eq!(cmpstrs.get(0), Some("One"));
27/// assert_eq!(cmpstrs.get(1), Some("Three"));
28/// assert_eq!(cmpstrs.get(2), None);
29/// ```
30#[repr(transparent)]
31#[derive(Clone)]
32pub struct CompactStrings(pub(crate) CompactBytestrings);
33
34impl CompactStrings {
35 /// Constructs a new, empty [`CompactStrings`].
36 ///
37 /// The [`CompactStrings`] will not allocate until strings are pushed into it.
38 ///
39 /// # Examples
40 /// ```
41 /// # use compact_strings::CompactStrings;
42 /// let mut cmpstrs = CompactStrings::new();
43 /// ```
44 #[must_use]
45 pub const fn new() -> Self {
46 Self(CompactBytestrings::new())
47 }
48
49 /// Constructs a new, empty [`CompactStrings`] with at least the specified capacities in each
50 /// vector.
51 ///
52 /// - `data_capacity`: The capacity of the data vector where the bytes of the strings are stored.
53 /// - `capacity_meta`: The capacity of the meta vector where the starting indices and lengths
54 /// of the strings are stored.
55 ///
56 /// The [`CompactStrings`] will be able to hold at least *`data_capacity`* bytes worth of strings
57 /// without reallocating the data vector, and at least *`capacity_meta`* of starting indices and
58 /// lengths without reallocating the meta vector. This method is allowed to allocate for more bytes
59 /// than the capacities. If a capacity is 0, the vector will not allocate.
60 ///
61 /// It is important to note that although the data and meta vectors have the
62 /// minimum capacities specified, they will have a zero *length*.
63 ///
64 /// If it is important to know the exact allocated capacity of the data vector, always use the
65 /// [`capacity`] method after construction.
66 ///
67 /// [`capacity`]: CompactStrings::capacity
68 ///
69 /// # Examples
70 /// ```
71 /// # use compact_strings::CompactStrings;
72 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
73 ///
74 /// assert_eq!(cmpstrs.len(), 0);
75 /// assert!(cmpstrs.capacity() >= 20);
76 /// assert!(cmpstrs.capacity_meta() >= 3);
77 /// ```
78 #[must_use]
79 pub fn with_capacity(data_capacity: usize, capacity_meta: usize) -> Self {
80 Self(CompactBytestrings::with_capacity(
81 data_capacity,
82 capacity_meta,
83 ))
84 }
85
86 /// Appends a string to the back of the [`CompactStrings`].
87 ///
88 /// # Examples
89 /// ```
90 /// # use compact_strings::CompactStrings;
91 /// let mut cmpstrs = CompactStrings::new();
92 /// cmpstrs.push("One");
93 /// cmpstrs.push("Two");
94 /// cmpstrs.push("Three");
95 ///
96 /// assert_eq!(cmpstrs.get(0), Some("One"));
97 /// assert_eq!(cmpstrs.get(1), Some("Two"));
98 /// assert_eq!(cmpstrs.get(2), Some("Three"));
99 /// assert_eq!(cmpstrs.get(3), None);
100 /// ```
101 pub fn push<S>(&mut self, string: S)
102 where
103 S: Deref<Target = str>,
104 {
105 self.0.push(string.as_bytes());
106 }
107
108 /// Returns a reference to the string stored in the [`CompactStrings`] at that position.
109 ///
110 /// # Examples
111 /// ```
112 /// # use compact_strings::CompactStrings;
113 /// let mut cmpstrs = CompactStrings::new();
114 /// cmpstrs.push("One");
115 /// cmpstrs.push("Two");
116 /// cmpstrs.push("Three");
117 ///
118 /// assert_eq!(cmpstrs.get(0), Some("One"));
119 /// assert_eq!(cmpstrs.get(1), Some("Two"));
120 /// assert_eq!(cmpstrs.get(2), Some("Three"));
121 /// assert_eq!(cmpstrs.get(3), None);
122 /// ```
123 #[must_use]
124 pub fn get(&self, index: usize) -> Option<&str> {
125 let bytes = self.0.get(index)?;
126 if cfg!(feature = "no_unsafe") {
127 core::str::from_utf8(bytes).ok()
128 } else {
129 unsafe { Some(core::str::from_utf8_unchecked(bytes)) }
130 }
131 }
132
133 /// Returns a reference to the string stored in the [`CompactStrings`] at that position, without
134 /// doing bounds checking.
135 ///
136 /// # Safety
137 /// Calling this method with an out-of-bounds index is undefined behavior even if the resulting reference is not used.
138 ///
139 /// # Examples
140 /// ```
141 /// # use compact_strings::CompactStrings;
142 /// let mut cmpstrs = CompactStrings::new();
143 /// cmpstrs.push("One");
144 /// cmpstrs.push("Two");
145 /// cmpstrs.push("Three");
146 ///
147 /// unsafe {
148 /// assert_eq!(cmpstrs.get_unchecked(0), "One");
149 /// assert_eq!(cmpstrs.get_unchecked(1), "Two");
150 /// assert_eq!(cmpstrs.get_unchecked(2), "Three");
151 /// }
152 /// ```
153 #[must_use]
154 #[cfg(not(feature = "no_unsafe"))]
155 pub unsafe fn get_unchecked(&self, index: usize) -> &str {
156 let bytes = self.0.get_unchecked(index);
157 core::str::from_utf8_unchecked(bytes)
158 }
159
160 /// Returns the number of strings in the [`CompactStrings`], also referred to as its 'length'.
161 ///
162 /// # Examples
163 /// ```
164 /// # use compact_strings::CompactStrings;
165 /// let mut cmpstrs = CompactStrings::new();
166 ///
167 /// cmpstrs.push("One");
168 /// cmpstrs.push("Two");
169 /// cmpstrs.push("Three");
170 ///
171 /// assert_eq!(cmpstrs.len(), 3);
172 /// ```
173 #[inline]
174 #[must_use]
175 pub fn len(&self) -> usize {
176 self.0.len()
177 }
178
179 /// Returns true if the [`CompactStrings`] contains no strings.
180 ///
181 /// # Examples
182 /// ```
183 /// # use compact_strings::CompactStrings;
184 /// let mut cmpstrs = CompactStrings::new();
185 /// assert!(cmpstrs.is_empty());
186 ///
187 /// cmpstrs.push("One");
188 ///
189 /// assert!(!cmpstrs.is_empty());
190 /// ```
191 #[inline]
192 #[must_use]
193 pub fn is_empty(&self) -> bool {
194 self.len() == 0
195 }
196
197 /// Returns the number of bytes the data vector can store without reallocating.
198 ///
199 /// # Examples
200 /// ```
201 /// # use compact_strings::CompactStrings;
202 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
203 ///
204 /// cmpstrs.push("One");
205 ///
206 /// assert!(cmpstrs.capacity() >= 20);
207 /// ```
208 #[inline]
209 #[must_use]
210 pub fn capacity(&self) -> usize {
211 self.0.capacity()
212 }
213
214 /// Returns the number of starting indices and lengths can store without reallocating.
215 ///
216 /// # Examples
217 /// ```
218 /// # use compact_strings::CompactStrings;
219 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
220 ///
221 /// cmpstrs.push("One");
222 /// cmpstrs.push("Two");
223 /// cmpstrs.push("Three");
224 /// assert!(cmpstrs.capacity_meta() >= 3);
225 ///
226 /// cmpstrs.push("Three");
227 /// assert!(cmpstrs.capacity_meta() > 3);
228 /// ```
229 #[inline]
230 #[must_use]
231 pub fn capacity_meta(&self) -> usize {
232 self.0.capacity_meta()
233 }
234
235 /// Clears the [`CompactStrings`], removing all strings.
236 ///
237 /// Note that this method has no effect on the allocated capacity of the vectors.
238 ///
239 /// # Examples
240 /// ```
241 /// # use compact_strings::CompactStrings;
242 /// let mut cmpstrs = CompactStrings::new();
243 ///
244 /// cmpstrs.push("One");
245 /// cmpstrs.push("Two");
246 /// cmpstrs.push("Three");
247 /// cmpstrs.clear();
248 ///
249 /// assert!(cmpstrs.is_empty());
250 /// ```
251 pub fn clear(&mut self) {
252 self.0.clear();
253 }
254
255 /// Shrinks the capacity of the data vector, which stores the bytes of the held strings, as much as possible.
256 ///
257 /// It will drop down as close as possible to the length but the allocator
258 /// may still inform the vector that there is space for a few more elements.
259 ///
260 /// # Examples
261 /// ```
262 /// # use compact_strings::CompactStrings;
263 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
264 ///
265 /// cmpstrs.push("One");
266 /// cmpstrs.push("Two");
267 /// cmpstrs.push("Three");
268 ///
269 /// assert!(cmpstrs.capacity() >= 20);
270 /// cmpstrs.shrink_to_fit();
271 /// assert!(cmpstrs.capacity() >= 3);
272 /// ```
273 #[inline]
274 pub fn shrink_to_fit(&mut self) {
275 self.0.shrink_to_fit();
276 }
277
278 /// Shrinks the capacity of the info vector, which stores the starting indices and lengths of
279 /// the held strings, as much as possible.
280 ///
281 /// It will drop down as close as possible to the length but the allocator
282 /// may still inform the vector that there is space for a few more elements.
283 ///
284 /// # Examples
285 /// ```
286 /// # use compact_strings::CompactStrings;
287 /// let mut cmpstrs = CompactStrings::with_capacity(20, 10);
288 ///
289 /// cmpstrs.push("One");
290 /// cmpstrs.push("Two");
291 /// cmpstrs.push("Three");
292 ///
293 /// assert!(cmpstrs.capacity_meta() >= 10);
294 /// cmpstrs.shrink_to_fit();
295 /// assert!(cmpstrs.capacity_meta() >= 3);
296 /// ```
297 #[inline]
298 pub fn shrink_meta_to_fit(&mut self) {
299 self.0.shrink_meta_to_fit();
300 }
301
302 /// Shrinks the capacity of the data vector, which stores the bytes of the held strings, with a lower bound.
303 ///
304 /// The capacity will remain at least as large as both the length and the supplied value.
305 ///
306 /// If the current capacity is less than the lower limit, this is a no-op.
307 ///
308 /// # Examples
309 /// ```
310 /// # use compact_strings::CompactStrings;
311 /// let mut cmpstrs = CompactStrings::with_capacity(20, 4);
312 ///
313 /// cmpstrs.push("One");
314 /// cmpstrs.push("Two");
315 /// cmpstrs.push("Three");
316 ///
317 /// assert!(cmpstrs.capacity() >= 20);
318 /// cmpstrs.shrink_to(4);
319 /// assert!(cmpstrs.capacity() >= 4);
320 /// ```
321 #[inline]
322 pub fn shrink_to(&mut self, min_capacity: usize) {
323 self.0.shrink_to(min_capacity);
324 }
325
326 /// Shrinks the capacity of the meta vector, which starting indices and lengths of the held strings,
327 /// with a lower bound.
328 ///
329 /// The capacity will remain at least as large as both the length and the supplied value.
330 ///
331 /// If the current capacity is less than the lower limit, this is a no-op.
332 ///
333 /// # Examples
334 /// ```
335 /// # use compact_strings::CompactStrings;
336 /// let mut cmpstrs = CompactStrings::with_capacity(20, 10);
337 ///
338 /// cmpstrs.push("One");
339 /// cmpstrs.push("Two");
340 /// cmpstrs.push("Three");
341 ///
342 /// assert!(cmpstrs.capacity_meta() >= 10);
343 /// cmpstrs.shrink_meta_to(4);
344 /// assert!(cmpstrs.capacity_meta() >= 4);
345 /// ```
346 #[inline]
347 pub fn shrink_meta_to(&mut self, min_capacity: usize) {
348 self.0.shrink_meta_to(min_capacity);
349 }
350
351 /// Removes the data pointing to where the string at the specified index is stored.
352 ///
353 /// Note: This does not remove the bytes of the string from memory, you may want to use
354 /// [`remove`] if you desire that behavior.
355 ///
356 /// Note: Because this shifts over the remaining elements in the meta vector, it has a
357 /// worst-case performance of *O*(*n*).
358 ///
359 /// [`remove`]: CompactStrings::remove
360 ///
361 /// # Examples
362 /// ```
363 /// # use compact_strings::CompactStrings;
364 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
365 ///
366 /// cmpstrs.push("One");
367 /// cmpstrs.push("Two");
368 /// cmpstrs.push("Three");
369 ///
370 /// cmpstrs.ignore(1);
371 ///
372 /// assert_eq!(cmpstrs.get(0), Some("One"));
373 /// assert_eq!(cmpstrs.get(1), Some("Three"));
374 /// assert_eq!(cmpstrs.get(2), None);
375 /// ```
376 pub fn ignore(&mut self, index: usize) {
377 self.0.ignore(index);
378 }
379
380 /// Removes the bytes of the string and data pointing to the string is stored.
381 ///
382 /// Note: This does not shrink the vectors where the bytes of the string and data to the string
383 /// are stored. You may shrink the data vector with [`shrink_to`] and [`shrink_to_fit`] and the
384 /// meta vector with [`shrink_meta_to`] and [`shrink_meta_to_fit`].
385 ///
386 /// Note: Because this shifts over the remaining elements in both data and meta vectors, it
387 /// has a worst-case performance of *O*(*n*). If you don't need the bytes of the string to
388 /// be removed, use [`ignore`] instead.
389 ///
390 /// [`shrink_to`]: CompactStrings::shrink_to
391 /// [`shrink_to_fit`]: CompactStrings::shrink_to_fit
392 /// [`shrink_meta_to`]: CompactStrings::shrink_meta_to
393 /// [`shrink_meta_to_fit`]: CompactStrings::shrink_meta_to_fit
394 /// [`ignore`]: CompactStrings::ignore
395 ///
396 /// # Examples
397 /// ```
398 /// # use compact_strings::CompactStrings;
399 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
400 ///
401 /// cmpstrs.push("One");
402 /// cmpstrs.push("Two");
403 /// cmpstrs.push("Three");
404 ///
405 /// cmpstrs.remove(1);
406 ///
407 /// assert_eq!(cmpstrs.get(0), Some("One"));
408 /// assert_eq!(cmpstrs.get(1), Some("Three"));
409 /// assert_eq!(cmpstrs.get(2), None);
410 /// ```
411 pub fn remove(&mut self, index: usize) {
412 self.0.remove(index);
413 }
414
415 /// Returns an iterator over the slice.
416 ///
417 /// The iterator yields all items from start to end.
418 ///
419 /// # Examples
420 ///
421 /// ```
422 /// # use compact_strings::CompactStrings;
423 /// let mut cmpstrs = CompactStrings::with_capacity(20, 3);
424 /// cmpstrs.push("One");
425 /// cmpstrs.push("Two");
426 /// cmpstrs.push("Three");
427 /// let mut iterator = cmpstrs.iter();
428 ///
429 /// assert_eq!(iterator.next(), Some("One"));
430 /// assert_eq!(iterator.next(), Some("Two"));
431 /// assert_eq!(iterator.next(), Some("Three"));
432 /// assert_eq!(iterator.next(), None);
433 /// ```
434 #[inline]
435 #[must_use]
436 pub fn iter(&self) -> Iter<'_> {
437 Iter(self.0.iter())
438 }
439}
440
441impl PartialEq for CompactStrings {
442 fn eq(&self, other: &Self) -> bool {
443 let len = self.len();
444 if len != other.len() {
445 return false;
446 }
447
448 for idx in 0..len {
449 if self[idx] != other[idx] {
450 return false;
451 }
452 }
453
454 true
455 }
456}
457
458impl Debug for CompactStrings {
459 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
460 f.debug_list().entries(self.iter()).finish()
461 }
462}
463
464impl<S> Extend<S> for CompactStrings
465where
466 S: Deref<Target = str>,
467{
468 #[inline]
469 fn extend<I: IntoIterator<Item = S>>(&mut self, iter: I) {
470 for s in iter {
471 self.push(s);
472 }
473 }
474}
475
476impl Index<usize> for CompactStrings {
477 type Output = str;
478
479 #[inline]
480 fn index(&self, index: usize) -> &Self::Output {
481 self.get(index).unwrap()
482 }
483}
484
485/// Iterator over strings in a [`CompactStrings`]
486///
487/// # Examples
488/// ```
489/// # use compact_strings::CompactStrings;
490/// let mut cmpstrs = CompactStrings::new();
491/// cmpstrs.push("One");
492/// cmpstrs.push("Two");
493/// cmpstrs.push("Three");
494///
495/// let mut iter = cmpstrs.into_iter();
496/// assert_eq!(iter.next(), Some("One"));
497/// assert_eq!(iter.next(), Some("Two"));
498/// assert_eq!(iter.next(), Some("Three"));
499/// assert_eq!(iter.next(), None);
500/// ```
501pub struct Iter<'a>(crate::compact_bytestrings::Iter<'a>);
502
503impl<'a> Iter<'a> {
504 pub fn new(inner: &'a CompactStrings) -> Self {
505 Self(inner.0.iter())
506 }
507
508 fn from_utf8_maybe_checked(bytes: &[u8]) -> Option<&str> {
509 if cfg!(feature = "no_unsafe") {
510 core::str::from_utf8(bytes).ok()
511 } else {
512 Some(unsafe { core::str::from_utf8_unchecked(bytes) })
513 }
514 }
515}
516
517impl<'a> Iterator for Iter<'a> {
518 type Item = &'a str;
519
520 fn next(&mut self) -> Option<Self::Item> {
521 self.0.next().and_then(Self::from_utf8_maybe_checked)
522 }
523
524 fn nth(&mut self, n: usize) -> Option<Self::Item> {
525 self.0.nth(n).and_then(Self::from_utf8_maybe_checked)
526 }
527
528 #[inline]
529 fn count(self) -> usize
530 where
531 Self: Sized,
532 {
533 self.len()
534 }
535
536 #[inline]
537 fn last(mut self) -> Option<Self::Item>
538 where
539 Self: Sized,
540 {
541 self.next_back()
542 }
543
544 #[inline]
545 fn size_hint(&self) -> (usize, Option<usize>) {
546 self.0.size_hint()
547 }
548}
549
550impl<'a> DoubleEndedIterator for Iter<'a> {
551 fn next_back(&mut self) -> Option<Self::Item> {
552 self.0.next_back().and_then(Self::from_utf8_maybe_checked)
553 }
554
555 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
556 self.0.nth_back(n).and_then(Self::from_utf8_maybe_checked)
557 }
558}
559
560impl ExactSizeIterator for Iter<'_> {
561 #[inline]
562 fn len(&self) -> usize {
563 self.0.len()
564 }
565}
566
567impl<'a> IntoIterator for &'a CompactStrings {
568 type Item = &'a str;
569
570 type IntoIter = Iter<'a>;
571
572 #[inline]
573 fn into_iter(self) -> Self::IntoIter {
574 self.iter()
575 }
576}
577
578impl<S> FromIterator<S> for CompactStrings
579where
580 S: Deref<Target = str>,
581{
582 fn from_iter<I: IntoIterator<Item = S>>(iter: I) -> Self {
583 let iter = iter.into_iter();
584 let meta_capacity = match iter.size_hint() {
585 (a, Some(b)) if a == b => a,
586 _ => 0,
587 };
588
589 let mut out = CompactStrings::with_capacity(0, meta_capacity);
590 for s in iter {
591 out.push(s);
592 }
593
594 out
595 }
596}
597
598impl<S, I> From<I> for CompactStrings
599where
600 S: Deref<Target = str>,
601 I: IntoIterator<Item = S>,
602{
603 #[inline]
604 fn from(value: I) -> Self {
605 FromIterator::from_iter(value)
606 }
607}
608
609impl TryFrom<CompactBytestrings> for CompactStrings {
610 type Error = core::str::Utf8Error;
611
612 fn try_from(value: CompactBytestrings) -> Result<Self, Self::Error> {
613 for bstr in &value {
614 let _ = core::str::from_utf8(bstr)?;
615 }
616
617 Ok(Self(value))
618 }
619}
620
621#[cfg(test)]
622mod tests {
623 use crate::CompactStrings;
624
625 #[test]
626 fn exact_size_iterator() {
627 let mut cmpstrs = CompactStrings::new();
628
629 cmpstrs.push("One");
630 cmpstrs.push("Two");
631 cmpstrs.push("Three");
632
633 let mut iter = cmpstrs.iter();
634 assert_eq!(iter.len(), 3);
635 let _ = iter.next();
636 assert_eq!(iter.len(), 2);
637 let _ = iter.next();
638 assert_eq!(iter.len(), 1);
639 let _ = iter.next();
640 assert_eq!(iter.len(), 0);
641 let _ = iter.next();
642 assert_eq!(iter.len(), 0);
643 }
644
645 #[test]
646 fn double_ended_iterator() {
647 let mut cmpbytes = CompactStrings::new();
648
649 cmpbytes.push("One");
650 cmpbytes.push("Two");
651 cmpbytes.push("Three");
652 cmpbytes.push("Four");
653
654 let mut iter = cmpbytes.iter();
655 assert_eq!(iter.next(), Some("One"));
656 assert_eq!(iter.next_back(), Some("Four"));
657 assert_eq!(iter.next(), Some("Two"));
658 assert_eq!(iter.next_back(), Some("Three"));
659 assert_eq!(iter.next(), None);
660 assert_eq!(iter.next_back(), None);
661 }
662}
663
664#[cfg(feature = "serde")]
665mod serde {
666 use serde::{
667 de::{SeqAccess, Visitor},
668 Deserialize, Deserializer, Serialize,
669 };
670
671 use crate::CompactStrings;
672
673 impl Serialize for CompactStrings {
674 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
675 serializer.collect_seq(self)
676 }
677 }
678
679 impl<'de> Deserialize<'de> for CompactStrings {
680 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
681 where
682 D: Deserializer<'de>,
683 {
684 deserializer.deserialize_seq(CompactStringsVisitor)
685 }
686 }
687
688 struct CompactStringsVisitor;
689
690 impl<'de> Visitor<'de> for CompactStringsVisitor {
691 type Value = CompactStrings;
692
693 fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result {
694 formatter.write_str("an array of strings")
695 }
696
697 #[inline]
698 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
699 where
700 A: SeqAccess<'de>,
701 {
702 let mut out = CompactStrings::with_capacity(0, seq.size_hint().unwrap_or_default());
703 while let Some(str) = seq.next_element::<&str>()? {
704 out.push(str);
705 }
706
707 Ok(out)
708 }
709 }
710}
711
712#[cfg(feature = "serde")]
713#[cfg_attr(feature = "serde", allow(unused_imports))]
714#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
715pub use self::serde::*;