string_ring/
lib.rs

1#![no_std]
2#![forbid(unsafe_code)]
3
4#![doc = include_str!("../README.md")]
5
6extern crate no_std_compat as std;
7
8use std::prelude::v1::*;
9use std::collections::VecDeque;
10
11use memchr::memchr;
12
13fn ceil_char_boundary_offset<I: Iterator<Item = u8>>(mut src: I) -> usize {
14    for i in 0..4 {
15        match src.next() {
16            None => {
17                debug_assert_eq!(i, 0);
18                return 0;
19            }
20            Some(b) => if (b as i8) >= -0x40 { // equivalent to: b < 128 || b >= 192 (copied from u8::is_utf8_char_boundary)
21                return i;
22            }
23        }
24    }
25    unreachable!()
26}
27#[test]
28fn test_ceil_char_boundary_offset() {
29    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(0)), 0);
30    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(1)), 0);
31    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(2)), 0);
32    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(3)), 0);
33    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(4)), 0);
34    assert_eq!(ceil_char_boundary_offset("hello".as_bytes().iter().copied().skip(5)), 0);
35
36    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(0)), 0);
37    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(1)), 2);
38    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(2)), 1);
39    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(3)), 0);
40    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(4)), 0);
41    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(5)), 0);
42    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(6)), 0);
43    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(7)), 2);
44    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(8)), 1);
45    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(9)), 0);
46    assert_eq!(ceil_char_boundary_offset("한eel들o".as_bytes().iter().copied().skip(10)), 0);
47}
48
49/// The level of precision used for removing old content from a [`StringRing`].
50pub enum Granularity {
51    /// Remove as few characters as possible (may result in partial lines at the beginning).
52    Character,
53    /// Remove entire lines at a time, but always as few lines as possible.
54    /// 
55    /// In this mode, if a removed line does not end with a new line character, all content will be deleted
56    /// and future input will be discarded until a new line character is encountered.
57    /// This is to guarantee that pushing `a` followed by `b` is equivalent to pushing `a + b`.
58    Line,
59}
60
61/// A circular string buffer with a set maximum size.
62/// 
63/// Strings can be pushed onto the end of the buffer.
64/// If the maximum size would be exceeded, the oldest content will be removed to make space.
65/// The specific strategy for removing old content is defined by [`Granularity`].
66pub struct StringRing {
67    content: VecDeque<u8>,
68    max_size: usize,
69    granularity: Granularity,
70    discarding: bool,
71}
72impl StringRing {
73    /// Creates a new, empty [`StringRing`] with the given settings.
74    pub fn new(max_size: usize, granularity: Granularity) -> Self {
75        Self {
76            granularity, max_size,
77            content: Default::default(),
78            discarding: false,
79        }
80    }
81
82    /// Gets the current size of the stored string.
83    pub fn len(&self) -> usize {
84        self.content.len()
85    }
86    /// Checks if the buffer is empty.
87    pub fn is_empty(&self) -> bool {
88        self.content.is_empty()
89    }
90    /// Removes all content from the buffer and resets it to the initial state.
91    pub fn clear(&mut self) {
92        self.content.clear();
93        self.discarding = false;
94    }
95
96    /// Gets a reference to the raw stored data.
97    /// Note that because this is a circular buffer of bytes, the content may be in two distinct pieces which may not independently be valid UTF-8 sequences.
98    /// However, the two slices together (in the returned order) always represents a valid UTF-8 string.
99    pub fn as_slices(&self) -> (&[u8], &[u8]) {
100        self.content.as_slices()
101    }
102    /// Makes the content of the circular buffer contiguous and returns it as a mutable slice.
103    pub fn make_contiguous(&mut self) -> &mut str {
104        std::str::from_utf8_mut(self.content.make_contiguous()).unwrap()
105    }
106
107    /// Appends content to the end of the buffer.
108    /// If this would cause the buffer to exceed its maximum size, old content is removed to make room (as defined by [`Granularity`]).
109    /// 
110    /// It is guaranteed that pushing `a` followed by `b` is equivalent to pushing `a + b`.
111    /// In [`Granularity::Line`] mode, this may lead to unexpected results if `a` does not end in a new line,
112    /// as bytes may need to be discarded from the beginning of `b` to reach the end of a line being removed.
113    /// This information is stored internally as a simple state machine to ensure stream consistency.
114    pub fn push(&mut self, mut s: &str) {
115        loop { // performs at most 3 iterations
116            if self.discarding {
117                debug_assert!(self.content.is_empty());
118                match self.granularity {
119                    Granularity::Character => unreachable!(),
120                    Granularity::Line => match memchr(b'\n', s.as_bytes()) {
121                        Some(x) => {
122                            self.discarding = false;
123                            s = &s[x + 1..];
124                        }
125                        None => return,
126                    }
127                }
128            }
129            debug_assert!(!self.discarding);
130
131            let quota = (self.content.len() + s.len()).saturating_sub(self.max_size);
132            if quota == 0 {
133                self.content.extend(s.as_bytes().iter().copied());
134                return;
135            }
136
137            if !self.content.is_empty() {
138                if quota >= self.content.len() {
139                    match self.granularity {
140                        Granularity::Character => (),
141                        Granularity::Line => self.discarding = *self.content.back().unwrap() != b'\n',
142                    }
143                    self.content.clear();
144                } else {
145                    let (a, b) = self.content.as_slices();
146                    let delta = ceil_char_boundary_offset(a.iter().chain(b).copied().skip(quota));
147                    let last_removed = self.content[quota + delta - 1];
148                    self.content.drain(..quota + delta);
149                    match self.granularity {
150                        Granularity::Character => (),
151                        Granularity::Line => if last_removed != b'\n' {
152                            let (a, b) = self.content.as_slices();
153                            match memchr(b'\n', a) {
154                                Some(x) => { self.content.drain(..x + 1); }
155                                None => match memchr(b'\n', b) {
156                                    Some(x) => { self.content.drain(..a.len() + x + 1); }
157                                    None => {
158                                        self.discarding = true;
159                                        self.content.clear();
160                                    }
161                                }
162                            }
163                        }
164                    }
165                }
166            } else {
167                let delta = ceil_char_boundary_offset(s.as_bytes()[quota..].iter().copied());
168                let last_removed = s.as_bytes()[quota + delta - 1];
169                s = &s[quota + delta..];
170                match self.granularity {
171                    Granularity::Character => (),
172                    Granularity::Line => self.discarding = last_removed != b'\n',
173                }
174            }
175        }
176    }
177}
178#[test]
179fn test_string_ring_char() {
180    let mut buf = StringRing::new(18, Granularity::Character);
181    assert_eq!(buf.make_contiguous(), "");
182    buf.push("hello world");
183    assert_eq!(buf.make_contiguous(), "hello world");
184    buf.push("this is a test");
185    assert_eq!(buf.make_contiguous(), "orldthis is a test");
186    buf.push("this is a really long string that will go over the buffer limit");
187    assert_eq!(buf.make_contiguous(), "r the buffer limit");
188    buf.push("한국");
189    assert_eq!(buf.make_contiguous(), "buffer limit한국");
190    buf.push("more content");
191    assert_eq!(buf.make_contiguous(), "한국more content");
192    buf.push("a");
193    assert_eq!(buf.make_contiguous(), "국more contenta");
194    buf.push("b");
195    assert_eq!(buf.make_contiguous(), "국more contentab");
196    buf.push("c");
197    assert_eq!(buf.make_contiguous(), "국more contentabc");
198    buf.push("def");
199    assert_eq!(buf.make_contiguous(), "more contentabcdef");
200}
201#[test]
202fn test_string_ring_line() {
203    let mut buf = StringRing::new(21, Granularity::Line);
204    assert_eq!(buf.make_contiguous(), "");
205    buf.push("hello world\n");
206    assert_eq!(buf.make_contiguous(), "hello world\n");
207    buf.push("this is a test\n");
208    assert_eq!(buf.make_contiguous(), "this is a test\n");
209    buf.push("small\n");
210    assert_eq!(buf.make_contiguous(), "this is a test\nsmall\n");
211    buf.push("a\n");
212    assert_eq!(buf.make_contiguous(), "small\na\n");
213    buf.push("this is a really long line that will go over the limit\n");
214    assert_eq!(buf.make_contiguous(), "");
215    buf.push("another test");
216    assert_eq!(buf.make_contiguous(), "another test");
217    buf.push("bananasyo");
218    assert_eq!(buf.make_contiguous(), "another testbananasyo");
219    buf.push("x");
220    assert_eq!(buf.make_contiguous(), "");
221    buf.push("more content");
222    assert_eq!(buf.make_contiguous(), "");
223    buf.push("even more content\nand a new line\n");
224    assert_eq!(buf.make_contiguous(), "and a new line\n");
225}