ggstd/bytes/
buffer.rs

1// Copyright 2023 The rust-ggstd authors. All rights reserved.
2// Copyright 2009 The Go Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6//! Simple byte buffer for marshaling data.
7
8use crate::compat;
9use std::io::Write;
10
11const SMALL_BUFFER_SIZE: usize = 64;
12
13/// A Buffer is a variable-sized buffer of bytes with read and write methods.
14// The zero value for Buffer is an empty buffer ready to use.
15pub struct Buffer {
16    // ggstd TODO: use std::io::Cursor instead?
17
18    // rust impl: it is much simpler then Go since we are using Vec,
19    // which has support for growing.
20    // buf[off..buf.len()] - unread data in the buffer
21    buf: Vec<u8>,
22    off: usize,
23    // 	lastRead readOp // last read operation, so that Unread* can work correctly.
24}
25
26// // The readOp constants describe the last action performed on
27// // the buffer, so that UnreadRune and UnreadByte can check for
28// // invalid usage. opReadRuneX constants are chosen such that
29// // converted to int they correspond to the rune size that was read.
30// type readOp int8
31
32// // Don't use iota for these, as the values need to correspond with the
33// // names and comments, which is easier to see when being explicit.
34// const (
35// 	opRead      readOp = -1 // Any other read operation.
36// 	opInvalid   readOp = 0  // Non-read operation.
37// 	opReadRune1 readOp = 1  // Read rune of size 1.
38// 	opReadRune2 readOp = 2  // Read rune of size 2.
39// 	opReadRune3 readOp = 3  // Read rune of size 3.
40// 	opReadRune4 readOp = 4  // Read rune of size 4.
41// )
42
43// // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
44// var ErrTooLarge = errors.New("bytes::Buffer::new(): too large")
45// var errNegativeRead = errors.New("bytes::Buffer::new(): reader returned negative count from Read")
46
47// const maxInt = int(^uint(0) >> 1)
48
49impl Buffer {
50    pub fn new() -> Self {
51        Self {
52            buf: Vec::with_capacity(SMALL_BUFFER_SIZE),
53            off: 0,
54        }
55    }
56
57    /// bytes returns a slice of length b.len() holding the unread portion of the buffer.
58    /// The slice is valid for use only until the next buffer modification (that is,
59    /// only until the next call to a method like Read, Write, Reset, or Truncate).
60    /// The slice aliases the buffer content at least until the next buffer modification,
61    /// so immediate changes to the slice will affect the result of future reads.
62    pub fn bytes(&self) -> &[u8] {
63        &self.buf[self.off..]
64    }
65
66    /// string returns the contents of the unread portion of the buffer
67    /// as a string.
68    // If the Buffer is a nil pointer, it returns "<nil>".
69    //
70    // To build strings more efficiently, see the strings.Builder type.
71    pub fn string(&self) -> String {
72        // 	if b == nil {
73        // 		// Special case, useful in debugging.
74        // 		return "<nil>"
75        // 	}
76        String::from_utf8_lossy(self.bytes()).to_string()
77    }
78
79    /// empty reports whether the unread portion of the buffer is empty.
80    fn empty(&self) -> bool {
81        self.buf.len() <= self.off
82    }
83
84    /// len returns the number of bytes of the unread portion of the buffer;
85    /// b.Len() == len(b.bytes()).
86    pub fn len(&self) -> usize {
87        self.buf.len() - self.off
88    }
89
90    #[must_use]
91    pub fn is_empty(&self) -> bool {
92        self.len() == 0
93    }
94
95    /// cap returns the capacity of the buffer's underlying byte slice, that is, the
96    /// total space allocated for the buffer's data.
97    pub fn cap(&self) -> usize {
98        self.buf.capacity()
99    }
100
101    // truncate discards all but the first n unread bytes from the buffer
102    // but continues to use the same allocated storage.
103    // It panics if n is greater than the length of the buffer.
104    pub fn truncate(&mut self, n: usize) {
105        if n == 0 {
106            self.reset();
107            return;
108        }
109        // self.lastRead = opInvalid
110        if n > self.len() {
111            panic!("bytes::Buffer::new(): truncation out of range");
112        }
113        self.buf.truncate(n + self.off);
114    }
115
116    /// reset resets the buffer to be empty,
117    /// but it retains the underlying storage for use by future writes.
118    /// Reset is the same as Truncate(0).
119    pub fn reset(&mut self) {
120        self.off = 0;
121        self.buf.clear();
122        // b.lastRead = opInvalid
123    }
124
125    /// guarantee_space guarantees space for n more bytes.
126    fn guarantee_space(&mut self, n: usize) {
127        // rust impl:
128        //   buf can grow indefinitely if we allow it.
129        //   We need to avoid that by moving off to the beginning of buf.
130        //   We move off in two cases:
131        //     - buf has no unread data
132        //     - off is further than half of the buf capacity
133        let data_size = self.buf.len() - self.off;
134        if data_size == 0 {
135            self.reset();
136        } else if self.off > SMALL_BUFFER_SIZE && self.off > self.buf.capacity() / 2 {
137            let end = self.buf.len();
138            self.buf.copy_within(self.off..end, 0);
139            self.buf.truncate(data_size);
140            self.off = 0;
141        }
142
143        let space_left = self.buf.capacity() - data_size;
144        if space_left < n {
145            let missing_capacity = n - space_left;
146            self.buf.reserve(missing_capacity);
147        }
148    }
149
150    /// grow grows the buffer's capacity, if necessary, to guarantee space for
151    /// another n bytes. After Grow(n), at least n bytes can be written to the
152    /// buffer without another allocation.
153    pub fn grow(&mut self, n: usize) {
154        self.guarantee_space(n);
155    }
156}
157
158impl Default for Buffer {
159    fn default() -> Self {
160        Self::new()
161    }
162}
163
164impl std::io::Read for Buffer {
165    /// Read reads the next p.len() bytes from the buffer or until the buffer
166    /// is drained. The return value n is the number of bytes read.
167    fn read(&mut self, p: &mut [u8]) -> std::io::Result<usize> {
168        // b.lastRead = opInvalid
169        if self.empty() {
170            // Buffer is empty, reset to recover space.
171            self.reset();
172            if p.is_empty() {
173                return Ok(0);
174            }
175            return Ok(0);
176        }
177        let n = compat::copy(p, &self.buf[self.off..]);
178        self.off += n;
179        if n > 0 {
180            // self.lastRead = opRead
181        }
182        Ok(n)
183    }
184}
185
186impl std::io::Write for Buffer {
187    /// write appends the contents of p to the buffer, growing the buffer as
188    /// needed. The return value n is the length of p.
189    fn write(&mut self, p: &[u8]) -> std::io::Result<usize> {
190        self.guarantee_space(p.len());
191        self.buf.extend_from_slice(p);
192        Ok(p.len())
193    }
194
195    fn flush(&mut self) -> std::io::Result<()> {
196        Ok(())
197    }
198}
199
200impl Buffer {
201    /// write_string appends the contents of s to the buffer, growing the buffer as
202    /// needed. The return value n is the length of s.
203    pub fn write_string(&mut self, s: &str) -> std::io::Result<usize> {
204        // b.lastRead = opInvalid
205        self.write(s.as_bytes())
206    }
207
208    // // MinRead is the minimum slice size passed to a Read call by
209    // // Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond
210    // // what is required to hold the contents of r, ReadFrom will not grow the
211    // // underlying buffer.
212    // const MinRead = 512
213
214    // // ReadFrom reads data from r until EOF and appends it to the buffer, growing
215    // // the buffer as needed. The return value n is the number of bytes read. Any
216    // // error except io.EOF encountered during the read is also returned. If the
217    // // buffer becomes too large, ReadFrom will panic with ErrTooLarge.
218    // fn ReadFrom(&self, r io.Reader) (n int64, err error) {
219    // 	b.lastRead = opInvalid
220    // 	for {
221    // 		i := b.guarantee_space(MinRead)
222    // 		self.buf = self.buf[..i]
223    // 		m, e := r.Read(self.buf[i:cap(self.buf)])
224    // 		if m < 0 {
225    // 			panic(errNegativeRead)
226    // 		}
227
228    // 		self.buf = self.buf[..i+m]
229    // 		n += int64(m)
230    // 		if e == io.EOF {
231    // 			return n, nil // e is EOF, so return nil explicitly
232    // 		}
233    // 		if e != nil {
234    // 			return n, e
235    // 		}
236    // 	}
237    // }
238
239    // // WriteTo writes data to w until the buffer is drained or an error occurs.
240    // // The return value n is the number of bytes written; it always fits into an
241    // // int, but it is int64 to match the io.WriterTo interface. Any error
242    // // encountered during the write is also returned.
243    // fn WriteTo(&self, w ggio::Writer) (n int64, err error) {
244    // 	b.lastRead = opInvalid
245    // 	if nBytes := b.Len(); nBytes > 0 {
246    // 		m, e := w.write(self.buf[self.off..])
247    // 		if m > nBytes {
248    // 			panic("bytes::Buffer::new().WriteTo: invalid Write count")
249    // 		}
250    // 		self.off += m
251    // 		n = int64(m)
252    // 		if e != nil {
253    // 			return n, e
254    // 		}
255    // 		// all bytes should have been written, by definition of
256    // 		// Write method in ggio::Writer
257    // 		if m != nBytes {
258    // 			return n, io.ErrShortWrite
259    // 		}
260    // 	}
261    // 	// Buffer is now empty; reset.
262    // 	b.reset()
263    // 	return n, nil
264    // }
265
266    /// write_byte appends the byte c to the buffer, growing the buffer as needed.
267    pub fn write_byte(&mut self, c: u8) -> std::io::Result<()> {
268        // b.lastRead = opInvalid
269        self.guarantee_space(1);
270        self.buf.push(c);
271        Ok(())
272    }
273
274    // // WriteRune appends the UTF-8 encoding of Unicode code point r to the
275    // // buffer, returning its length and an error, which is always nil but is
276    // // included to match bufio::Writer's WriteRune. The buffer is grown as needed;
277    // // if it becomes too large, WriteRune will panic with ErrTooLarge.
278    // fn WriteRune(&self, r rune) (n: usize, err error) {
279    // 	// Compare as uint32 to correctly handle negative runes.
280    // 	if uint32(r) < utf8::RUNE_SELF {
281    // 		b.write_byte(byte(r))
282    // 		return 1, nil
283    // 	}
284    // 	b.lastRead = opInvalid
285    // 	m, ok := b.tryGrowByReslice(utf8.UTFMAX)
286    // 	if !ok {
287    // 		m = b.guarantee_space(utf8.UTFMAX)
288    // 	}
289    // 	self.buf = utf8.AppendRune(self.buf[..m], r)
290    // 	return self.buf.len() - m, nil
291    // }
292
293    // // Next returns a slice containing the next n bytes from the buffer,
294    // // advancing the buffer as if the bytes had been returned by read.
295    // // If there are fewer than n bytes in the buffer, Next returns the entire buffer.
296    // // The slice is only valid until the next call to a read or write method.
297    // pub fn Next(&self, n: usize) -> &[u8] {
298    // 	b.lastRead = opInvalid
299    // 	m := b.Len()
300    // 	if n > m {
301    // 		n = m
302    // 	}
303    // 	data := self.buf[self.off : self.off+n]
304    // 	self.off += n
305    // 	if n > 0 {
306    // 		b.lastRead = opRead
307    // 	}
308    // 	return data
309    // }
310
311    /// read_byte reads and returns the next byte from the buffer.
312    /// If no byte is available, it returns None.
313    pub fn read_byte(&mut self) -> Option<u8> {
314        if self.empty() {
315            // Buffer is empty, reset to recover space.
316            self.reset();
317            return None;
318        }
319        let c = self.buf[self.off];
320        self.off += 1;
321        // 	self.lastRead = opRead
322        Some(c)
323    }
324
325    // // ReadRune reads and returns the next UTF-8-encoded
326    // // Unicode code point from the buffer.
327    // // If no bytes are available, the error returned is io.EOF.
328    // // If the bytes are an erroneous UTF-8 encoding, it
329    // // consumes one byte and returns U+FFFD, 1.
330    // pub fn ReadRune(&self) (r rune, size int, err error) {
331    // 	if b.empty() {
332    // 		// Buffer is empty, reset to recover space.
333    // 		b.reset()
334    // 		return 0, 0, io.EOF
335    // 	}
336    // 	c := self.buf[self.off]
337    // 	if c < utf8::RUNE_SELF {
338    // 		self.off++
339    // 		b.lastRead = opReadRune1
340    // 		return rune(c), 1, nil
341    // 	}
342    // 	r, n := utf8.decode_rune(self.buf[self.off..])
343    // 	self.off += n
344    // 	b.lastRead = readOp(n)
345    // 	return r, n, nil
346    // }
347
348    // // UnreadRune unreads the last rune returned by ReadRune.
349    // // If the most recent read or write operation on the buffer was
350    // // not a successful ReadRune, UnreadRune returns an error.  (In this regard
351    // // it is stricter than UnreadByte, which will unread the last byte
352    // // from any read operation.)
353    // pub fn UnreadRune(&self) error {
354    // 	if b.lastRead <= opInvalid {
355    // 		return errors.New("bytes::Buffer::new(): UnreadRune: previous operation was not a successful ReadRune")
356    // 	}
357    // 	if self.off >= int(b.lastRead) {
358    // 		self.off -= int(b.lastRead)
359    // 	}
360    // 	b.lastRead = opInvalid
361    // 	return nil
362    // }
363
364    // var errUnreadByte = errors.New("bytes::Buffer::new(): UnreadByte: previous operation was not a successful read")
365
366    // // UnreadByte unreads the last byte returned by the most recent successful
367    // // read operation that read at least one byte. If a write has happened since
368    // // the last read, if the last read returned an error, or if the read read zero
369    // // bytes, UnreadByte returns an error.
370    // fn UnreadByte(&self) error {
371    // 	if b.lastRead == opInvalid {
372    // 		return errUnreadByte
373    // 	}
374    // 	b.lastRead = opInvalid
375    // 	if self.off > 0 {
376    // 		self.off--
377    // 	}
378    // 	return nil
379    // }
380
381    // // ReadBytes reads until the first occurrence of delim in the input,
382    // // returning a slice containing the data up to and including the delimiter.
383    // // If ReadBytes encounters an error before finding a delimiter,
384    // // it returns the data read before the error and the error itself (often io.EOF).
385    // // ReadBytes returns err != nil if and only if the returned data does not end in
386    // // delim.
387    // fn ReadBytes(&self, delim byte) (line [u8], err error) {
388    // 	slice, err := b.readSlice(delim)
389    // 	// return a copy of slice. The buffer's backing array may
390    // 	// be overwritten by later calls.
391    // 	line = append(line, slice...)
392    // 	return line, err
393    // }
394
395    // // readSlice is like ReadBytes but returns a reference to internal buffer data.
396    // fn readSlice(&self, delim byte) (line [u8], err error) {
397    // 	i := index_byte(self.buf[self.off..], delim)
398    // 	end := self.off + i + 1
399    // 	if i < 0 {
400    // 		end = self.buf.len()
401    // 		err = io.EOF
402    // 	}
403    // 	line = self.buf[self.off:end]
404    // 	self.off = end
405    // 	b.lastRead = opRead
406    // 	return line, err
407    // }
408
409    // // ReadString reads until the first occurrence of delim in the input,
410    // // returning a string containing the data up to and including the delimiter.
411    // // If ReadString encounters an error before finding a delimiter,
412    // // it returns the data read before the error and the error itself (often io.EOF).
413    // // ReadString returns err != nil if and only if the returned data does not end
414    // // in delim.
415    // pub fn ReadString(&self, delim byte) (line string, err error) {
416    // 	slice, err := b.readSlice(delim)
417    // 	return string(slice), err
418    // }
419}
420
421/// new_buffer creates and initializes a new Buffer using buf as its
422/// initial contents. The new Buffer takes ownership of buf, and the
423/// caller should not use buf after this call. new_buffer is intended to
424/// prepare a Buffer to read existing data. It can also be used to set
425/// the initial size of the internal buffer for writing. To do that,
426/// buf should have the desired capacity but a length of zero.
427///
428/// Buffer::new() also can be used to initialize a Buffer.
429pub fn new_buffer(buf: Vec<u8>) -> Buffer {
430    Buffer { buf, off: 0 }
431}
432
433/// new_buffer_string creates and initializes a new Buffer using string s as its
434/// initial contents. It is intended to prepare a buffer to read an existing
435/// string.
436///
437/// Buffer::new() also can be used to initialize a Buffer.
438pub fn new_buffer_string(s: &str) -> Buffer {
439    Buffer {
440        buf: s.as_bytes().to_vec(),
441        off: 0,
442    }
443}
444
445impl std::io::BufRead for Buffer {
446    fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
447        Ok(self.bytes())
448    }
449
450    fn consume(&mut self, amt: usize) {
451        self.off += amt.min(self.len());
452    }
453}