char_circle/
lib.rs

1//! A circular buffer for strings and traits for in-place string transforms.
2//!
3//! This crate provides two key types: the [`CharCircle`] struct and the
4//! [`StringTransform`] trait. The `CharCircle` is a circular buffer
5//! specialized for UTF-8 strings, and the `StringTransform` trait builds upon
6//! it to provide a character-oriented API for in-place string transformations.
7//! In short, `StringTransform` allows you to implement transformations as
8//! iterator adaptors, with copy-on-write optimizations in mind.
9//!
10//! The `CharCircle` uses internal mutability. This enables its contents to be
11//! consumed by an external iterator, [`Chars`]. As a consequence, the
12//! `CharCircle` is not `Sync`. It is implemented as a `RefCell` around the
13//! [`RawCircle`], which has a nearly identical API, uses external mutability,
14//! is thread-safe, and does not provide a consuming iterator.
15//!
16//! The `StringTransform` trait is implemented by factories of iterator
17//! adaptors. For simple cases, the [`SimpleTransform`] trait provides an
18//! alternative that is implemented directly by the adaptor.
19//!
20//!
21//! Example: To Uppercase
22//! ---------------------------------------------------------------------------
23//!
24//! Transforms which don't require configuration are most easily implemented
25//! with `SimpleTransform`.
26//!
27//! Here we implement an uppercase transform:
28//!
29//! ```
30//! use char_circle::{SimpleTransform, Chars};
31//!
32//! // Step 1: Define the transform as an iterator adaptor.
33//! struct ToUpper<I>(I);
34//!
35//! impl<I> Iterator for ToUpper<I> where I: Iterator<Item=char> {
36//!     type Item = char;
37//!     fn next(&mut self) -> Option<char> {
38//!         self.0.next().map(|ch| ch.to_ascii_uppercase())
39//!     }
40//! }
41//!
42//! // Step 2: Define a constructor for the adaptor with `SimpleTransform`.
43//! impl<'a> SimpleTransform<'a> for ToUpper<Chars<'a>> {
44//!     fn transform_chars(chars: Chars<'a>) -> Self {
45//!         ToUpper(chars)
46//!     }
47//! }
48//!
49//! // Step 3: Profit!
50//! let s = "can you hear me in the back?";
51//! let s = ToUpper::transform(s);
52//! assert_eq!(&s, "CAN YOU HEAR ME IN THE BACK?");
53//! ```
54//!
55//!
56//! Example: Caesar Cipher
57//! ---------------------------------------------------------------------------
58//!
59//! Transforms that need to be configured should define a factory which
60//! implements `StringTransform`.
61//!
62//! Here we implement a Caesar cipher configured with its key:
63//!
64//! ```
65//! use char_circle::{StringTransform, Chars};
66//!
67//! // Step 1: Define the transform as an iterator adaptor.
68//! struct CaesarCipherIter<I> {
69//!     inner: I,
70//!     key: i32,
71//! }
72//!
73//! impl<I> Iterator for CaesarCipherIter<I> where I: Iterator<Item=char> {
74//!     type Item = char;
75//!     fn next(&mut self) -> Option<char> {
76//!         let plaintext = self.inner.next()?;
77//!         let ciphertext = plaintext as i32 + self.key;
78//!         let ciphertext = std::char::from_u32(ciphertext as u32).unwrap();
79//!         Some(ciphertext)
80//!     }
81//! }
82//!
83//! // Step 2: Define a factory for the adaptor with `StringTransform`.
84//! struct CaesarCipher(i32);
85//!
86//! impl<'a> StringTransform<'a> for CaesarCipher {
87//!     type Iter = CaesarCipherIter<Chars<'a>>;
88//!     fn transform_chars(&self, chars: Chars<'a>) -> Self::Iter {
89//!         CaesarCipherIter { inner: chars, key: self.0 }
90//!     }
91//! }
92//!
93//! // Step 3: Profit!
94//! let encoder = CaesarCipher(8);
95//! let decoder = CaesarCipher(-8);
96//! let plaintext = "Veni, vidi, vici";
97//! let ciphertext = encoder.transform(plaintext);
98//! assert_eq!(&ciphertext, "^mvq4(~qlq4(~qkq");
99//! let plaintext = decoder.transform(ciphertext);
100//! assert_eq!(&plaintext, "Veni, vidi, vici");
101//! ```
102
103use std::borrow::Cow;
104use std::cell::RefCell;
105use std::char;
106use std::cmp;
107use std::io::{self, Read, Write};
108use std::marker::PhantomData;
109use std::mem;
110use std::ptr;
111
112
113// RawCircle
114// ---------------------------------------------------------------------------
115
116/// A thread-safe version of [`CharCircle`].
117///
118/// The API of this buffer is almost identical to `CharCircle` except that it
119/// uses external mutability, and it does not provide a means to consume its
120/// characters from an external iterator.
121#[derive(Debug, Default, Clone)]
122pub struct RawCircle {
123    buf: Vec<u8>,    // The backing storage.
124    len: usize,      // The number of used bytes in the buffer.
125    n_chars: usize,  // The number of characters in the buffer.
126    read: usize,     // Index of the read head. May equal the capacity.
127    write: usize,    // Index of the write head. May equal the capacity.
128}
129
130impl RawCircle {
131    /// Construct a new `RawCircle` using a string as the initial buffer.
132    pub fn new(s: String) -> RawCircle {
133        let n_chars = s.chars().count();
134        let buf = s.into_bytes();
135        let len = buf.len();
136        RawCircle { buf, len, n_chars, read: 0, write: 0 }
137    }
138
139    /// Construct a new, empty `RawCircle`.
140    pub fn empty() -> RawCircle {
141        RawCircle::default()
142    }
143
144    /// The number of UTF-8 bytes in the buffer.
145    pub fn len(&self) -> usize {
146        self.len
147    }
148
149    /// The number of characters in the buffer.
150    pub fn n_chars(&self) -> usize {
151        self.n_chars
152    }
153
154    /// The number of bytes the buffer can hold before reallocating.
155    ///
156    /// This refers to the length of the backing vector. That vector may have
157    /// additional capacity allocated to it that is not reported by this method.
158    pub fn capacity(&self) -> usize {
159        self.buf.len()
160    }
161
162    /// Reallocate the buffer if it is full.
163    fn grow_if_full(&mut self) {
164        // SAFTEY: There are several safety concerns here.
165        //
166        // 1. The read and write heads may equal the capacity when this method is
167        //    called (but may not otherwise be out of bounds). We move the read head
168        //    to 0 if it is at the capacity, and we prove the write head is in bounds
169        //    before we use it.
170        //
171        // 2. We move data around using `ptr::copy`. We must maintain that the affected
172        //    regions are in bounds and that the destination is trash bytes.
173        //
174        // 3. We must properly maintain the read and write heads. When we return, the
175        //    [BACK] and [FRONT] areas must be valid UTF-8, and the read head may equal
176        //    the capacity. The length should never change.
177        //
178        // It is safe to directly set the length of the buffer to the capacity. It is
179        // in-bounds and there are no initialization or drop-safety concerns with
180        // plain-old bytes.
181        unsafe {
182            let old_cap = self.capacity();
183            if self.read == old_cap { self.read = 0 };
184            if self.read == 0 { self.write = self.len };
185
186            if old_cap == 0 {
187                // We have no capacity, so add some.
188                // Only hit the allocator if the backing vector has no extra capacity.
189                debug_assert!(self.read == 0);
190                debug_assert!(self.write == 0);
191                if self.buf.capacity() == 0 { self.buf.reserve(1); }
192                let new_cap = self.buf.capacity();
193                self.buf.set_len(new_cap);
194
195            } else if self.len == old_cap {
196                // If the backing vector has excess capacity, we just use that.
197                // Otherwise we ask the allocator to double our capacity.
198                if old_cap == self.buf.capacity() {
199                    self.buf.reserve(old_cap);
200                }
201                let new_cap = self.buf.capacity();
202                self.buf.set_len(new_cap);
203
204                if self.write == self.read {
205                    // The memory areas are [BACK, FRONT, TMP].
206                    // The read and write heads are between [BACK] and [FRONT].
207                    // Rearange: [BACK, FRONT, TMP] -> [BACK, TMP, FRONT].
208                    let front_size = old_cap - self.read;
209                    let new_read = new_cap - front_size;
210                    let src = self.buf.get_unchecked_mut(self.read) as *mut u8;
211                    let dest = self.buf.get_unchecked_mut(new_read) as *mut u8;
212                    ptr::copy(src, dest, front_size);
213                    self.read = new_read;
214                } else {
215                    debug_assert!(self.read == 0);
216                    debug_assert!(self.write == self.len);
217                }
218            }
219        }
220    }
221
222    /// Return the number of UTF-8 bytes of the next character.
223    fn peek_char_len(&self) -> Option<usize> {
224        // SAFETY: It is safe to use an unchecked get at the read head when the
225        // length is non-zero.
226        if self.len == 0 { return None };
227        let byte = unsafe { self.buf.get_unchecked(self.read) };
228        let reverse = byte ^ 0b11111111;
229        let leading_ones = reverse.leading_zeros();
230        match leading_ones {
231            0 => Some(1),
232            1 => unreachable!("invalid utf-8"),
233            2 => Some(2),
234            3 => Some(3),
235            4 => Some(4),
236            _ => unreachable!("invalid utf-8"),
237        }
238    }
239
240    /// Read a single byte from the buffer.
241    ///
242    /// It is unsafe to partially read a multibyte UTF-8 character.
243    ///
244    /// This method DOES update `len` but DOES NOT update `n_chars`.
245    /// Callers MUST update `n_chars`.
246    unsafe fn read_byte(&mut self) -> Option<u8> {
247        // SAFTEY: The read head may equal the capacity,
248        // but may not otherwise be out of bounds.
249        if self.len == 0 { return None };
250        if self.read == self.capacity() { self.read = 0 };
251        let byte = self.buf.get_unchecked(self.read);
252        self.read += 1;
253        self.len -= 1;
254        Some(*byte)
255    }
256
257    /// Read the next character in the buffer.
258    pub fn read_char(&mut self) -> Option<char> {
259        // SAFETY: We MUST read an entire character, and we MUST update `n_chars`.
260        unsafe {
261            let byte = self.read_byte()?;
262            let reverse = byte ^ 0b11111111;
263            let leading_ones = reverse.leading_zeros();
264            let mask = 0b11111111 >> leading_ones;
265            let mut ch = (byte & mask) as u32;
266            let n_additional_bytes = leading_ones.saturating_sub(1);
267            for _ in 0..n_additional_bytes {
268                let byte = self.read_byte().unwrap();
269                let byte = byte & 0b00111111;
270                ch = (ch << 6) | byte as u32;
271            }
272            self.n_chars -= 1;
273            Some(char::from_u32_unchecked(ch))
274        }
275    }
276
277    /// Read bytes from this circle into a buffer.
278    ///
279    /// This method will only ever read complete UTF-8 characters. It returns the
280    /// number of bytes read; it never returns an error.
281    ///
282    /// This is the implementation of [`std::io::Read`] for `RawCircle`.
283    pub fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
284        let max_len = cmp::min(buf.len(), self.len());
285        let mut len = 0;
286        loop {
287            match self.peek_char_len() {
288                None => return Ok(len),
289                Some(n) => {
290                    if len + n <= max_len {
291                        // SAFETY: We MUST read an entire character, and we MUST update `n_chars`.
292                        for i in 0..n {
293                            buf[len+i] = unsafe { self.read_byte().unwrap() };
294                        }
295                        self.n_chars -= 1;
296                        len += n;
297                    } else {
298                        return Ok(len);
299                    }
300                }
301            }
302        }
303    }
304
305    /// Read bytes from this circle into a buffer.
306    ///
307    /// This method is equivalent to [`RawCircle::read`] except the return value
308    /// is the buffer cast to a `&str`.
309    pub fn read_str<'a>(&mut self, buf: &'a mut [u8]) -> &'a str {
310        // SAFETY: It is safe to cast to a string because `self.read` only ever reads
311        // valid UTF-8.
312        let n = self.read(buf).unwrap();
313        let str = &buf[..n] as *const [u8] as *const str;
314        return unsafe { &*str }
315    }
316
317    /// Write a single byte into the buffer.
318    ///
319    /// It is unsafe to write invalid UTF-8.
320    ///
321    /// This method DOES update `len` but DOES NOT update `n_chars`.
322    /// Callers MUST update `n_chars`.
323    unsafe fn write_byte(&mut self, byte: u8) {
324        // SAFTEY: The write head may equal the capacity,
325        // but may not otherwise be out of bounds.
326        self.grow_if_full();
327        if self.write == self.capacity() { self.write = 0 };
328        let byte_ref = self.buf.get_unchecked_mut(self.write);
329        *byte_ref = byte;
330        self.write += 1;
331        self.len += 1;
332    }
333
334    /// Write a character into the buffer.
335    pub fn write_char(&mut self, ch: char) {
336        // SAFETY: We MUST write an entire character, and we MUST update `n_chars`.
337        unsafe {
338            let mut utf8 = [0u8; 4];
339            for byte in ch.encode_utf8(&mut utf8).as_bytes() {
340                self.write_byte(*byte)
341            }
342            self.n_chars += 1;
343        }
344    }
345
346    /// Read bytes from a string into this buffer;
347    ///
348    /// This method will only ever write complete UTF-8 characters. It returns the
349    /// number of bytes written. This method returns an error if the input is not
350    /// valid UTF-8.
351    ///
352    /// This is the implementation of [`std::io::Write`] for `RawCircle`.
353    pub fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
354        let mut len = 0;
355        let mut tmp = [0u8; 4];
356        loop {
357            // SAFETY: We must make sure the read is in-bounds to use `get_unchecked`.
358            if len == buf.len() { return Ok(len) };
359            let first_byte = unsafe { *buf.get_unchecked(len) };
360            let reverse = first_byte ^ 0b11111111;
361            let leading_ones = reverse.leading_zeros();
362            let n = match leading_ones {
363                0 => 1,
364                1 => return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid UTF-8")),
365                2 => 2,
366                3 => 3,
367                4 => 4,
368                _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid UTF-8")),
369            };
370            tmp[0] = first_byte;
371            for i in 1..n {
372                // SAFETY: We must make sure the read is in-bounds to use `get_unchecked`.
373                if len + i == buf.len() { return Ok(len) };
374                let byte = unsafe { *buf.get_unchecked(len + i) };
375                let reverse = byte ^ 0b11111111;
376                let leading_ones = reverse.leading_zeros();
377                if leading_ones != 1 {
378                    return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid UTF-8"));
379                }
380                tmp[i] = byte;
381            }
382            // SAFETY: We MUST write an entire character, and we MUST update `n_chars`.
383            for i in 0..n {
384                unsafe { self.write_byte(tmp[i]) };
385            }
386            self.n_chars += 1;
387            len += n;
388        }
389    }
390
391    /// Read bytes from a string into this buffer;
392    ///
393    /// This method is equivalent to [`RawCircle::write`] except that it cannot return
394    /// an error because the input is valid UTF-8.
395    pub fn write_str(&mut self, buf: &str) -> usize {
396        self.write(buf.as_bytes()).unwrap()
397    }
398
399    /// Rearange the contents so that the read head is at 0.
400    fn realign(&mut self) {
401        // SAFTEY: There are several safety concerns here.
402        //
403        // 1. The read and write heads may equal the capacity when this method is
404        //    called (but may not otherwise be out of bounds). We move the read head
405        //    to 0 if it is at the capacity, and we prove the write head is in bounds
406        //    before we use it.
407        //
408        // 2. We move data around using `ptr::copy`, `ptr::copy_nonoverlapping`, and
409        //    `ptr::swap_nonoverlapping`. We must maintain that all affected regions
410        //    are in bounds and non-overlapping when required. We must also maintain
411        //    that the destination of copys is trash bytes.
412        //
413        // 3. We must properly maintain the read and write heads. When we return, the
414        //    read head should be at 0, and the write head should be at `self.len`.
415        //    The range `read..write` must be valid UTF-8, and the write head may
416        //    equal the capacity. The length should never change.
417        unsafe {
418            if self.read == self.capacity() { self.read = 0 };
419            if self.read == 0 { self.write = self.len };
420
421            if self.len == 0 || self.read == 0 {
422                // Nothing to do.
423
424            } else if self.read < self.write {
425                // The areas in memory are [LEFT, STR, RIGHT].
426                // [LEFT, STR, RIGHT] -> [STR, LEFT, RIGHT].
427                let src = self.buf.get_unchecked_mut(self.read) as *mut u8;
428                let dest = self.buf.get_unchecked_mut(0) as *mut u8;
429                ptr::copy(src, dest, self.len);
430
431            } else {
432                // SAFTEY: Note that the write head is in bounds in this case.
433                debug_assert!(self.write < self.read);
434
435                // The areas in memory are [BACK, TMP, FRONT]. [TMP] may be empty.
436                // We are trying to get to [FRONT, BACK, TMP] where [FRONT, BACK] is our string.
437                let mut back = (0, self.write);
438                let mut tmp = (self.write, self.read);
439                let mut front = (self.read, self.capacity());
440                let len = |bounds: (usize, usize)| bounds.1 - bounds.0;
441
442                // [BACK, TMP, FRONT] -> [BACK, FRONT, TMP]
443                let src = self.buf.get_unchecked_mut(front.0) as *mut u8;
444                let dest = self.buf.get_unchecked_mut(tmp.0) as *mut u8;
445                let count = len(front);
446                ptr::copy(src, dest, count);
447                front = (tmp.0, tmp.0 + count);
448                tmp = (front.1, self.capacity());
449
450                // Iterativly move parts of [FRONT] to the correct position.
451                // When we're done, we still have three areas [BACK, FRONT, TMP],
452                // but they do not include the bytes that are already in place.
453                while len(tmp) < len(back) {
454                    if len(front) < len(back) {
455                        // Say [BACK] = [B0, B1] where [B0] has the same size as [FRONT].
456                        // [B0, B1, FRONT, TMP] -> [FRONT, B1, B0, TMP]
457                        let count = len(front);
458                        let b0 = (back.0, back.0 + count);
459                        let b1 = (back.0 + count, back.1);
460                        let x = self.buf.get_unchecked_mut(b0.0) as *mut u8;
461                        let y = self.buf.get_unchecked_mut(front.0) as *mut u8;
462                        ptr::swap_nonoverlapping(x, y, count);
463                        back = b1;
464                    } else {
465                        // Say [FRONT] = [F0, F1] where [F0] has the same size as [BACK].
466                        // [BACK, F0, F1, TMP] -> [F0, BACK, F1, TMP]
467                        let count = len(back);
468                        let f0 = (front.0, front.0 + count);
469                        let f1 = (front.0 + count, front.1);
470                        let x = self.buf.get_unchecked_mut(f0.0) as *mut u8;
471                        let y = self.buf.get_unchecked_mut(back.0) as *mut u8;
472                        ptr::swap_nonoverlapping(x, y, count);
473                        back = f0;
474                        front = f1;
475                    }
476                }
477
478                if len(tmp) != 0 {
479                    // Say [TMP] = [T0, T1] where [T0] has the same size as [BACK].
480                    // [BACK, FRONT, T0, T1] -> [T0, FRONT, BACK, T1]
481                    let src = self.buf.get_unchecked_mut(back.0) as *mut u8;
482                    let dest = self.buf.get_unchecked_mut(tmp.0) as *mut u8;
483                    let count = len(back);
484                    ptr::copy_nonoverlapping(src, dest, count);
485                    let t0 = (back.0, back.0 + count);
486                    back = (tmp.0, tmp.0 + count);
487
488                    // [T0, FRONT, BACK, T1] -> [FRONT, BACK, T0, T1]
489                    let src = self.buf.get_unchecked_mut(front.0) as *mut u8;
490                    let dest = self.buf.get_unchecked_mut(t0.0) as *mut u8;
491                    let count = len(front) + len(back);
492                    ptr::copy(src, dest, count);
493                } else {
494                    // [BACK, FRONT, TMP] == [FRONT]
495                    debug_assert!(len(back) == 0);
496                }
497            }
498        }
499
500        // Fix the read and write heads.
501        self.read = 0;
502        self.write = self.len;
503    }
504
505    /// Unpack this circular buffer into a byte vector.
506    pub fn into_vec(mut self) -> Vec<u8> {
507        // SAFTEY: It is safe to directly set the length of the buffer to the length
508        // of the content. It is in-bounds and there are no drop-safety concerns with
509        // plain-old bytes. When the buffer has been realigned, the content will be
510        // on the range `0..len`.
511        self.realign();
512        let mut vec = self.buf;
513        unsafe { vec.set_len(self.len) };
514        vec
515    }
516
517    /// Unpack this circular buffer into a string.
518    pub fn into_string(self) -> String {
519        // SAFTEY: The public API only allows valid UTF-8 to be read or written from
520        // the buffer. Thus it is safe to cast to a string.
521        let vec = self.into_vec();
522        unsafe { String::from_utf8_unchecked(vec) }
523    }
524}
525
526impl From<String> for RawCircle {
527    fn from(s: String) -> RawCircle {
528        RawCircle::new(s)
529    }
530}
531
532impl From<RawCircle> for String {
533    fn from(c: RawCircle) -> String {
534        c.into_string()
535    }
536}
537
538impl From<RawCircle> for Vec<u8> {
539    fn from(c: RawCircle) -> Vec<u8> {
540        c.into_vec()
541    }
542}
543
544impl Read for RawCircle {
545    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
546        RawCircle::read(self, buf)
547    }
548}
549
550impl Write for RawCircle {
551    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
552        RawCircle::write(self, buf)
553    }
554
555    fn flush(&mut self) -> io::Result<()> {
556        Ok(())
557    }
558}
559
560
561// CharCircle
562// ---------------------------------------------------------------------------
563
564/// A circular buffer of characters.
565///
566/// The reallocation policy of [`std::collections::VecDeque`] makes it a poor
567/// choice as a circular buffer for modifying strings in place. It reallocates
568/// early (!) and when it does, it places its contents in such a way that
569/// requires shuffling to convert it back into a string in the common case.
570/// This circular buffer provides a better alternative.
571///
572/// This circular buffer will not allocate early and is optimized for the case
573/// when you read exactly the contents that were in the initial buffer. In that
574/// case, the read head is guaranteed to be at index 0 of the underlying
575/// buffer, making the conversion into a string trivial.
576///
577/// Additionally, this circular buffer provides a `char` oriented interface, but
578/// uses UTF-8 internally, allowing it to operate directly on [`String`]s.
579#[derive(Debug, Default, Clone)]
580pub struct CharCircle {
581    raw: RefCell<RawCircle>,
582}
583
584impl CharCircle {
585    /// Construct a new `CharCircle` using a string as the initial buffer.
586    pub fn new(s: String) -> CharCircle {
587        CharCircle { raw: RefCell::new(RawCircle::new(s)) }
588    }
589
590    /// Construct a new, empty `CharCircle`.
591    pub fn empty() -> CharCircle {
592        CharCircle::default()
593    }
594
595    /// The number of UTF-8 bytes in the buffer.
596    pub fn len(&self) -> usize {
597        self.raw.borrow().len()
598    }
599
600    /// The number of characters in the buffer.
601    pub fn n_chars(&self) -> usize {
602        self.raw.borrow().n_chars()
603    }
604
605    /// The number of bytes the buffer can hold before reallocating.
606    ///
607    /// This refers to the length of the backing vector. That vector may have
608    /// additional capacity allocated to it that is not reported by this method.
609    pub fn capacity(&self) -> usize {
610        self.raw.borrow().capacity()
611    }
612
613    /// Read the next character in the buffer.
614    pub fn read_char(&self) -> Option<char> {
615        self.raw.borrow_mut().read_char()
616    }
617
618    /// Read bytes from this circle into a buffer.
619    ///
620    /// This method will only ever read complete UTF-8 characters. It returns the
621    /// number of bytes read; it never returns an error.
622    ///
623    /// This is the implementation of [`std::io::Read`] for `CharCircle`.
624    pub fn read(&self, buf: &mut [u8]) -> io::Result<usize> {
625        self.raw.borrow_mut().read(buf)
626    }
627
628    /// Read bytes from this circle into a buffer.
629    ///
630    /// This method is equivalent to [`RawCircle::read`] except the return value
631    /// is the buffer cast to a `&str`.
632    pub fn read_str<'a>(&self, buf: &'a mut [u8]) -> &'a str {
633        self.raw.borrow_mut().read_str(buf)
634    }
635
636    /// Write a character into the buffer.
637    pub fn write_char(&self, ch: char) {
638        self.raw.borrow_mut().write_char(ch)
639    }
640
641    /// Read bytes from a string into this buffer;
642    ///
643    /// This method will only ever write complete UTF-8 characters. It returns the
644    /// number of bytes written. This method returns an error if the input is not
645    /// valid UTF-8.
646    ///
647    /// This is the implementation of [`std::io::Write`] for `CharCircle`.
648    pub fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
649        self.raw.borrow_mut().write(buf)
650    }
651
652    /// Read bytes from a string into this buffer;
653    ///
654    /// This method is equivalent to [`CharCircle::write`] except that it cannot return
655    /// an error because the input is valid UTF-8.
656    pub fn write_str(&self, buf: &str) -> usize {
657        self.raw.borrow_mut().write_str(buf)
658    }
659
660    /// Unpack this circular buffer into a byte vector.
661    pub fn into_vec(self) -> Vec<u8> {
662        self.raw.into_inner().into_vec()
663    }
664
665    /// Unpack this circular buffer into a string.
666    pub fn into_string(self) -> String {
667        self.raw.into_inner().into_string()
668    }
669
670    /// Read characters from the buffer with an iterator.
671    ///
672    /// The returned iterator will read at most `n` characters and ensures that it
673    /// has been exhausted upon drop.
674    ///
675    /// Calling `next` on the iterator is equivalent to calling `read_char` on
676    /// this buffer.
677    pub fn take_chars(&self, n: usize) -> Chars {
678        Chars::new(&self, n)
679    }
680
681    /// Read the current characters from the buffer with an iterator.
682    ///
683    /// The returned iterator will read at most `n` characters, where `n` is the
684    /// number of characters currently in the buffer, and ensures that it has been
685    /// exhausted upon drop.
686    ///
687    /// Calling `next` on the iterator is equivalent to calling `read_char` on
688    /// this buffer.
689    ///
690    /// This is equivalent to calling [`CharCircle::take_chars`] with the current
691    /// number of characters in the buffer. In particular, interleaving calls to
692    /// `read_char` and `write_char` on the buffer with calls to `next` on the
693    /// iterator may cause the iterator to consume characters that were not in the
694    /// buffer at the time it was created.
695    pub fn take_current_chars(&self) -> Chars {
696        self.take_chars(self.n_chars())
697    }
698}
699
700impl From<String> for CharCircle {
701    fn from(s: String) -> CharCircle {
702        CharCircle::new(s)
703    }
704}
705
706impl From<CharCircle> for String {
707    fn from(c: CharCircle) -> String {
708        c.into_string()
709    }
710}
711
712impl From<CharCircle> for Vec<u8> {
713    fn from(c: CharCircle) -> Vec<u8> {
714        c.into_vec()
715    }
716}
717
718impl Read for CharCircle {
719    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
720        CharCircle::read(self, buf)
721    }
722}
723
724impl Write for CharCircle {
725    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
726        CharCircle::write(self, buf)
727    }
728
729    fn flush(&mut self) -> io::Result<()> {
730        Ok(())
731    }
732}
733
734
735// Chars
736// ---------------------------------------------------------------------------
737
738/// An iterator that consumes characters in a [`CharCircle`].
739///
740/// Calling `next` on this iterator is equivalent to calling
741/// [`CharCircle::read_char`] on the corresponding `CharCircle`.
742///
743/// This iterator ensures that it has been exhausted upon drop.
744///
745/// In the [`StringTransform`] API, this struct is an iterator over the
746/// characters of the string being transformed.
747#[derive(Debug)]
748pub struct Chars<'a> {
749    circle: &'a CharCircle,
750    n_chars: usize,
751}
752
753impl<'a> Chars<'a> {
754    fn new(circle: &'a CharCircle, n_chars: usize) -> Chars<'a> {
755        Chars{ circle, n_chars }
756    }
757}
758
759impl<'a> Iterator for Chars<'a> {
760    type Item = char;
761
762    fn next(&mut self) -> Option<char> {
763        if self.n_chars == 0 {
764            None
765        } else {
766            let ch = self.circle.read_char()?;
767            self.n_chars -= 1;
768            Some(ch)
769        }
770    }
771}
772
773impl<'a> Drop for Chars<'a> {
774    /// Ensure the iterator is exhausted.
775    fn drop(&mut self) {
776        for _ in self { }
777    }
778}
779
780
781// StringTransform
782// ---------------------------------------------------------------------------
783
784/// A factory trait for in-place string transformations.
785///
786/// This trait allows character-iterator adaptors to modify strings in-place.
787/// It is implemented by adaptor factories with a single required method,
788/// `transform_chars`, for constructing the adaptors. In return, this trait
789/// provides a method, `transform`, which can apply the transformation to
790/// both owned strings and string slices, without allocating when possible.
791///
792/// The in-place operation works by treating the underlying string as a
793/// circular buffer. The transform reads characters from the front of the
794/// buffer and writes characters to the back of the buffer. Once the transform
795/// returns `None`, the unread characters are deleted and the circular buffer
796/// is cast back into a string. Note that the transformation cannot always be
797/// applied in place; if the transform ever returns more bytes than it has
798/// read, an allocation is required to grow the buffer.
799///
800/// This trait supports copy-on-write optimizations. Implementors can override
801/// the [`StringTransform::will_modify`] method to short-circuit the transform.
802///
803/// See [`SimpleTransform`] for a variant of this trait that is implemented
804/// directly by iterator adaptors.
805///
806/// The lifetime `'a` refers to the lifetime of the `transform` method.
807pub trait StringTransform<'a> {
808    /// The type after applying this transform to a [`Chars`] iterator.
809    type Iter: Iterator<Item=char>;
810
811    /// Transform the characters of a string.
812    fn transform_chars(&self, chars: Chars<'a>) -> Self::Iter;
813
814    /// A hint to short-circuit string transformations.
815    ///
816    /// If true, the string might be modified by the transform. If false, the
817    /// string will definitely not be modified by the transform.
818    ///
819    /// Implementors may override this function to facilitate copy-on-write
820    /// optimizations. The default implementation always returns true, which
821    /// disables copy-on-write.
822    #[allow(unused_variables)]
823    fn will_modify(&self, val: &str) -> bool {
824        return true
825    }
826
827    /// Transform a string in-place
828    ///
829    /// This method can operate on both `String` and `&str`.
830    ///
831    /// A new string may be allocated if the input is not owned or if the
832    /// transformation needs to buffer more characters than the string has capacity.
833    fn transform<'b, T: Into<Cow<'b, str>>>(&self, s: T) -> Cow<'b, str> {
834        // SAFTEY: The lifetime `'a` refers to the circular buffer, but the borrow
835        // checker isn't smart enough to know that, because `'a` is a type parameter
836        // of the trait. Transumting the iterator to a new lifetime solves the issue.
837        // The safety condition is that we must not let the iterator outlive the
838        // buffer. This means that `old_chars` must be dropped before `circle`.
839        //
840        // Note that it is not safe for the buffer and iterator to be used from
841        // different threads. `CharCircle` is implemented with a `RefCell`, ensuring
842        // that it is not `Send` and that `Chars` is neither `Send` nor `Sync`.
843        let s = s.into();
844        if self.will_modify(&s) {
845            let s = s.into_owned();
846            let circle = CharCircle::new(s);
847            let old_chars = circle.take_current_chars();
848            let old_chars: Chars<'_> = unsafe { mem::transmute(old_chars) };
849            let new_chars = self.transform_chars(old_chars);
850            for ch in new_chars { circle.write_char(ch) };  // `old_chars` is dropped after loop.
851            let t = circle.into_string();  // `circle` is dropped here.
852            Cow::Owned(t)
853        } else {
854            s
855        }
856    }
857}
858
859
860// SimpleTransform
861// ---------------------------------------------------------------------------
862
863/// A simple trait for in-place string transformations.
864///
865/// This trait allows character-iterator adaptors to modify strings in-place.
866/// It is implemented by adaptors with a single required associated function,
867/// `transform_chars`, for constructing the adaptor. In return, this trait
868/// provides an associated function, `transform`, which can apply the
869/// transformation to both owned strings and string slices, without allocating
870/// when possible.
871///
872/// This trait is a simplified version of [`StringTransform`]. See the
873/// documentation of that trait for more details.
874pub trait SimpleTransform<'a>: Iterator<Item=char> + Sized {
875    /// Transform the characters of a string.
876    fn transform_chars(chars: Chars<'a>) -> Self;
877
878    /// A hint to short-circuit string transformations.
879    ///
880    /// If true, the string might be modified by the transform. If false, the
881    /// string will definitely not be modified by the transform.
882    ///
883    /// Implementors may override this function to facilitate copy-on-write
884    /// optimizations. The default implementation always returns true, which
885    /// disables copy-on-write.
886    #[allow(unused_variables)]
887    fn will_modify(val: &str) -> bool {
888        return true
889    }
890
891    /// Transform a string in-place
892    ///
893    /// This function can operate on both `String` and `&str`.
894    ///
895    /// A new string may be allocated if the input is not owned or if the
896    /// transformation needs to buffer more characters than the string has capacity.
897    fn transform<'b, T: Into<Cow<'b, str>>>(s: T) -> Cow<'b, str> {
898        SimpleStringTransform::<Self>::new().transform(s)
899    }
900}
901
902
903// SimpleStringTransform
904// ---------------------------------------------------------------------------
905
906/// Used when you have a [`SimpleTransform`] but need a [`StringTransform`].
907///
908/// ### Example
909///
910/// ```
911/// # use char_circle::*;
912/// // `Identity` is a `SimpleTransform`.
913/// struct Identity<I>(I);
914///
915/// impl<I: Iterator<Item=char>> Iterator for Identity<I> {
916///     type Item = char;
917///     fn next(&mut self) -> Option<char> {
918///         self.0.next()
919///     }
920/// }
921///
922/// impl<'a> SimpleTransform<'a> for Identity<Chars<'a>> {
923///     fn transform_chars(chars: Chars<'a>) -> Self {
924///         Identity(chars)
925///     }
926/// }
927///
928/// // This function takes a `StringTransform`.
929/// fn apply_transform<'a, T: StringTransform<'a>>(t: T, s: String) -> String {
930///     t.transform(s).into_owned()
931/// }
932///
933/// // Use `SimpleStringTransform` to bridge the gap.
934/// let t = SimpleStringTransform::<Identity<Chars>>::new();
935/// let s = apply_transform(t, "Hello World".to_string());
936/// assert_eq!(&s, "Hello World");
937/// ```
938pub struct SimpleStringTransform<T>(pub PhantomData<T>);
939
940impl<'a, T: SimpleTransform<'a>> SimpleStringTransform<T> {
941    /// Create a new `SimpleStringTransform`.
942    pub fn new() -> Self {
943        SimpleStringTransform(PhantomData)
944    }
945}
946
947impl<'a, T: SimpleTransform<'a>> StringTransform<'a> for SimpleStringTransform<T> {
948    type Iter = T;
949
950    fn transform_chars(&self, chars: Chars<'a>) -> Self::Iter {
951        Self::Iter::transform_chars(chars)
952    }
953
954    fn will_modify(&self, val: &str) -> bool {
955        Self::Iter::will_modify(val)
956    }
957}
958
959
960// Unit Tests
961// ---------------------------------------------------------------------------
962
963#[cfg(test)]
964mod tests {
965    use super::*;
966
967    // Identity transform
968    // -------------------------
969
970    /// An identity function as a string transform.
971    struct Identity<I>(I);
972
973    impl<I: Iterator<Item=char>> Iterator for Identity<I> {
974        type Item = char;
975        fn next(&mut self) -> Option<char> {
976            self.0.next()
977        }
978    }
979
980    impl<'a> SimpleTransform<'a> for Identity<Chars<'a>> {
981        fn transform_chars(chars: Chars<'a>) -> Self {
982            Identity(chars)
983        }
984    }
985
986    // Half transform
987    // -------------------------
988
989    /// Deletes every other character.
990    struct Half<I>(I);
991
992    impl<I: Iterator<Item=char>> Iterator for Half<I> {
993        type Item = char;
994        fn next(&mut self) -> Option<char> {
995            self.0.next()?;
996            self.0.next()
997        }
998    }
999
1000    impl<'a> SimpleTransform<'a> for Half<Chars<'a>> {
1001        fn transform_chars(chars: Chars<'a>) -> Self {
1002            Half(chars)
1003        }
1004    }
1005
1006    // Double transform
1007    // -------------------------
1008
1009    /// Duplicates every character.
1010    struct Double<I> {
1011        inner: I,
1012        state: Option<char>,
1013    }
1014
1015    impl<I: Iterator<Item=char>> Iterator for Double<I> {
1016        type Item = char;
1017        fn next(&mut self) -> Option<char> {
1018            match self.state {
1019                Some(_) => self.state.take(),
1020                None => {
1021                    let state = self.inner.next();
1022                    self.state = state;
1023                    state
1024                }
1025            }
1026        }
1027    }
1028
1029    impl<'a> SimpleTransform<'a> for Double<Chars<'a>> {
1030        fn transform_chars(chars: Chars<'a>) -> Self {
1031            Double { inner: chars, state: None }
1032        }
1033    }
1034
1035    // Tests
1036    // -------------------------
1037
1038    #[test]
1039    fn test_transform() {
1040        let s: String = "Hello World".to_string();
1041
1042        // An identity transform does not reallocate.
1043        let ptr = s.as_ptr();
1044        let cap = s.capacity();
1045        let s = Identity::transform(s).into_owned();
1046        assert_eq!(&s, "Hello World");
1047        assert_eq!(s.as_ptr(), ptr);
1048        assert_eq!(s.capacity(), cap);
1049
1050        // A half transform does not reallocate.
1051        let ptr = s.as_ptr();
1052        let cap = s.capacity();
1053        let s = Half::transform(s).into_owned();
1054        assert_eq!(&s, "el ol");
1055        assert_eq!(s.as_ptr(), ptr);
1056        assert_eq!(s.capacity(), cap);
1057
1058        // A first double transform does not reallocate because there is excess capacity.
1059        let ptr = s.as_ptr();
1060        let cap = s.capacity();
1061        let s = Double::transform(s).into_owned();
1062        assert_eq!(&s, "eell  ooll");
1063        assert_eq!(s.as_ptr(), ptr);
1064        assert_eq!(s.capacity(), cap);
1065
1066        // A second double transform _does_ reallocate because it needs additional capacity.
1067        let cap = s.capacity();
1068        let s = Double::transform(s).into_owned();
1069        assert_eq!(&s, "eeeellll    oooollll");
1070        assert_ne!(s.capacity(), cap);
1071    }
1072
1073    #[test]
1074    fn test_chars() {
1075        let s: String = "Hello World".to_string();
1076        let circle = CharCircle::new(s);
1077
1078        // The iterator consumes characters from the buffer.
1079        let mut chars = circle.take_chars(6);
1080        assert_eq!(chars.next(), Some('H'));
1081        assert_eq!(chars.next(), Some('e'));
1082        assert_eq!(chars.next(), Some('l'));
1083
1084        // The iterator consumes all `n` characters upon drop.
1085        std::mem::drop(chars);
1086        let s = circle.into_string();
1087        assert_eq!(s.as_str(), "World");
1088
1089        // The iterator does not consume more than `n` characters.
1090        let s: String = "Hello World".to_string();
1091        let circle = CharCircle::new(s);
1092        let mut chars = circle.take_chars(6);
1093        assert_eq!(chars.next(), Some('H'));
1094        assert_eq!(chars.next(), Some('e'));
1095        assert_eq!(chars.next(), Some('l'));
1096        assert_eq!(chars.next(), Some('l'));
1097        assert_eq!(chars.next(), Some('o'));
1098        assert_eq!(chars.next(), Some(' '));
1099        assert_eq!(chars.next(), None);
1100    }
1101
1102    #[test]
1103    fn test_circle() {
1104        // `read` and `write` work as expected.
1105        // The `read_str` and `write_str` versions hit that code path.
1106        let circle = CharCircle::empty();
1107        circle.write_str("Hello World!");
1108        assert_eq!(circle.len(), 11);
1109        assert_eq!(circle.n_chars(), 11);
1110        let mut buf = [0u8; 6];
1111        let buf_str = circle.read_str(&mut buf);
1112        assert_eq!(buf_str, "Hello ");
1113        assert_eq!(circle.len(), 5);
1114        assert_eq!(circle.n_chars(), 5);
1115        assert_eq!(&circle.into_string(), "World!");
1116
1117        // A more complicated test that alternates between reading and writing.
1118        let circle = CharCircle::empty();
1119        circle.write_char('F');
1120        circle.write_char('o');
1121        circle.write_char('o');
1122        circle.write_char('B');
1123        circle.write_char('a');
1124        circle.write_char('r');
1125        assert_eq!(circle.read_char(), Some('F'));
1126        assert_eq!(circle.read_char(), Some('o'));
1127        assert_eq!(circle.read_char(), Some('o'));
1128        assert_eq!(circle.read_char(), Some('B'));
1129        assert_eq!(circle.read_char(), Some('a'));
1130        assert_eq!(circle.read_char(), Some('r'));
1131        assert_eq!(circle.read_char(), None);
1132        circle.write_char('H');
1133        circle.write_char('e');
1134        circle.write_char('l');
1135        circle.write_char('l');
1136        circle.write_char('o');
1137        circle.write_char(' ');
1138        circle.write_char('W');
1139        circle.write_char('o');
1140        circle.write_char('r');
1141        circle.write_char('l');
1142        circle.write_char('d');
1143        circle.write_char('!');
1144        assert_eq!(circle.read_char(), Some('H'));
1145        assert_eq!(circle.read_char(), Some('e'));
1146        assert_eq!(circle.read_char(), Some('l'));
1147        assert_eq!(circle.read_char(), Some('l'));
1148        assert_eq!(circle.read_char(), Some('o'));
1149        assert_eq!(circle.read_char(), Some(' '));
1150        circle.write_char('F');
1151        circle.write_char('o');
1152        circle.write_char('o');
1153        circle.write_char('F');
1154        circle.write_char('o');
1155        circle.write_char('o');
1156        assert_eq!(circle.read_char(), Some('W'));
1157        assert_eq!(circle.read_char(), Some('o'));
1158        assert_eq!(circle.read_char(), Some('r'));
1159        assert_eq!(circle.read_char(), Some('l'));
1160        assert_eq!(circle.read_char(), Some('d'));
1161        assert_eq!(circle.read_char(), Some('!'));
1162        assert_eq!(circle.read_char(), Some('F'));
1163        assert_eq!(circle.read_char(), Some('o'));
1164        assert_eq!(circle.read_char(), Some('o'));
1165        circle.write_char('B');
1166        circle.write_char('a');
1167        circle.write_char('r');
1168        assert_eq!(&circle.into_string(), "FooBar");
1169    }
1170}