ctrl_z/
lib.rs

1//! A composable reader to treat `0x1A` as an end-of-file marker.
2//!
3//! Historically, `0x1A` (commonly referred to as `CTRL-Z`, `^Z`, or a "substitute character") was used
4//! in old systems to explicitly mark the end of a file. While modern systems no longer require this
5//! practice, some legacy files still contain this byte to mark the end of a file. This library
6//! provides a reader to treat `0x1A` as the end of a file, rather than reading it as a regular byte.
7//!
8//! # Usage
9//! This library provides a reader in the form of a `struct` named `ReadToCtrlZ`. As is common
10//! practice, this reader is composable with other types implementing the
11//! [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html) or
12//! [`BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) traits. The reader checks the
13//! returned bytes for the presence of the EOF marker `0x1A` and stops reading when it is encountered.
14//!
15//! # Example
16//! For example, the reader defined below only reads until the `0x1A` byte, at which point it stops
17//! reading.
18//!
19//! ```
20//! use ctrl_z::ReadToCtrlZ;
21//! use std::io::Read;
22//! #
23//! # // Redefines `[u8]:as_slice()` for backwards compatibility.
24//! # trait AsSlice {
25//! #     fn as_slice(&self) -> &[u8];
26//! # }
27//! #
28//! # impl AsSlice for [u8] {
29//! #     fn as_slice(&self) -> &[u8] {
30//! #         self
31//! #     }
32//! # }
33//!
34//! let mut reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
35//! let mut output = String::new();
36//!
37//! // Reading omits the final `0x1A` byte.
38//! assert!(reader.read_to_string(&mut output).is_ok());
39//! assert_eq!(output, "foo");
40//! ```
41
42#![allow(deprecated)]
43
44#[cfg(test)]
45#[macro_use]
46extern crate claim;
47
48use std::io::BufRead;
49use std::io::Error;
50use std::io::ErrorKind;
51use std::io::Read;
52use std::io::Result;
53use std::slice;
54
55/// A composable reader to read until a `0x1A` byte (commonly known as `CTRL-Z` or the "substitute
56/// character") is encountered.
57///
58/// This `struct` is a wrapper around another type that implements [`Read`] or [`BufRead`]. Calls
59/// to the methods of those traits will be forwarded to the interior type until a `0x1A` byte is
60/// read, at which point reading will cease.
61///
62/// # Example
63/// Here is an example of a `ReadToCrtlZ` wrapped around a `&[u8]`, which implements [`Read`].
64///
65/// ```
66/// use ctrl_z::ReadToCtrlZ;
67/// use std::io::Read;
68/// #
69/// # // Redefines `[u8]:as_slice()` for backwards compatibility.
70/// # trait AsSlice {
71/// #     fn as_slice(&self) -> &[u8];
72/// # }
73/// #
74/// # impl AsSlice for [u8] {
75/// #     fn as_slice(&self) -> &[u8] {
76/// #         self
77/// #     }
78/// # }
79///
80/// let mut reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
81/// let mut output = String::new();
82///
83/// // Reading omits the final `0x1A` byte.
84/// assert!(reader.read_to_string(&mut output).is_ok());
85/// assert_eq!(output, "foo");
86/// ```
87pub struct ReadToCtrlZ<R> {
88    /// The internal reader being read.
89    inner: R,
90    /// Whether or not the EOF `0x1A` byte has been reached.
91    terminated: bool,
92}
93
94impl<R> ReadToCtrlZ<R> {
95    /// Creates a new `ReadToCtrlZ`, wrapping the provided reader.
96    ///
97    /// # Example
98    /// Here is an example of creating a new `ReadToCtrlZ` wrapping around a `&[u8]`.
99    ///
100    /// ```
101    /// use ctrl_z::ReadToCtrlZ;
102    /// #
103    /// # // Redefines `[u8]:as_slice()` for backwards compatibility.
104    /// # trait AsSlice {
105    /// #     fn as_slice(&self) -> &[u8];
106    /// # }
107    /// #
108    /// # impl AsSlice for [u8] {
109    /// #     fn as_slice(&self) -> &[u8] {
110    /// #         self
111    /// #     }
112    /// # }
113    ///
114    /// let reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
115    /// ```
116    pub fn new(inner: R) -> Self {
117        ReadToCtrlZ {
118            inner: inner,
119            terminated: false,
120        }
121    }
122}
123
124impl<R> Read for ReadToCtrlZ<R>
125where
126    R: Read,
127{
128    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
129        if self.terminated {
130            return Ok(0);
131        }
132
133        let n = try!(self.inner.read(buf));
134        for i in 0..n {
135            if *try!(buf.get(i).ok_or_else(|| {
136                Error::new(ErrorKind::Other, "buffer smaller than amount of bytes read")
137            })) == b'\x1a'
138            {
139                self.terminated = true;
140                return Ok(i);
141            }
142        }
143        Ok(n)
144    }
145}
146
147impl<R> BufRead for ReadToCtrlZ<R>
148where
149    R: BufRead,
150{
151    fn fill_buf(&mut self) -> Result<&[u8]> {
152        if self.terminated {
153            return Ok(&[]);
154        }
155
156        let buf = try!(self.inner.fill_buf());
157        for i in 0..buf.len() {
158            // SAFETY: `i` is guaranteed to be a valid index into `buf`.
159            if *unsafe { buf.get_unchecked(i) } == b'\x1a' {
160                if i == 0 {
161                    self.terminated = true;
162                }
163                // SAFETY: The range `..i` is guaranteed to be a valid index into `buf`.
164                return Ok(unsafe { slice::from_raw_parts(buf.as_ptr(), i) });
165            }
166        }
167        Ok(buf)
168    }
169
170    fn consume(&mut self, amount: usize) {
171        self.inner.consume(amount);
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use std::io::BufRead;
179    use std::io::ErrorKind;
180    use std::io::Read;
181    use std::io::Result;
182
183    #[test]
184    fn read_exclude_ctrl_z() {
185        let mut output = String::new();
186
187        assert_ok_eq!(
188            ReadToCtrlZ::new(b"foo\x1a" as &[u8]).read_to_string(&mut output),
189            3
190        );
191        assert_eq!(output, "foo");
192    }
193
194    #[test]
195    fn read_no_ctrl_z() {
196        let mut output = String::new();
197
198        assert_ok_eq!(
199            ReadToCtrlZ::new(b"foo" as &[u8]).read_to_string(&mut output),
200            3
201        );
202        assert_eq!(output, "foo");
203    }
204
205    #[test]
206    fn read_stop_at_ctrl_z() {
207        let mut output = String::new();
208
209        assert_ok_eq!(
210            ReadToCtrlZ::new(b"foo\x1abar" as &[u8]).read_to_string(&mut output),
211            3
212        );
213        assert_eq!(output, "foo");
214    }
215
216    #[test]
217    fn read_after_ctrl_z() {
218        let mut output = String::new();
219        let mut reader = ReadToCtrlZ::new(b"foo\x1abar" as &[u8]);
220
221        assert_ok_eq!(reader.read_to_string(&mut output), 3);
222        assert_eq!(output, "foo");
223
224        // This indicates the reader has reached EOF.
225        assert_ok_eq!(reader.read_to_string(&mut output), 0);
226    }
227
228    struct BadReader;
229
230    impl Read for BadReader {
231        fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
232            Ok(buf.len() + 1)
233        }
234    }
235
236    #[test]
237    fn read_with_bad_inner() {
238        let error = assert_err!(ReadToCtrlZ::new(BadReader).read(&mut []));
239
240        assert_eq!(error.kind(), ErrorKind::Other);
241        assert_eq!(
242            error.to_string(),
243            "buffer smaller than amount of bytes read"
244        )
245    }
246
247    #[test]
248    fn buf_read_exclude_ctrl_z() {
249        assert_ok_eq!(ReadToCtrlZ::new(b"foo\x1a" as &[u8]).fill_buf(), b"foo");
250    }
251
252    #[test]
253    fn buf_read_no_ctrl_z() {
254        assert_ok_eq!(ReadToCtrlZ::new(b"foo" as &[u8]).fill_buf(), b"foo");
255    }
256
257    #[test]
258    fn buf_read_stop_at_ctrl_z() {
259        assert_ok_eq!(ReadToCtrlZ::new(b"foo\x1abar" as &[u8]).fill_buf(), b"foo");
260    }
261
262    #[test]
263    fn buf_read_after_ctrl_z() {
264        let mut reader = ReadToCtrlZ::new(b"foo\x1abar" as &[u8]);
265
266        assert_ok_eq!(reader.fill_buf(), b"foo");
267        reader.consume(3);
268
269        // The reader should return nothing else, since the EOF `0x1A` was reached.
270        assert_ok_eq!(reader.fill_buf(), b"");
271    }
272
273    #[test]
274    fn buf_read_after_starting_ctrl_z() {
275        let mut reader = ReadToCtrlZ::new(b"\x1abar" as &[u8]);
276
277        // Should stop before "bar".
278        assert_ok_eq!(reader.fill_buf(), b"");
279
280        // The reader should return nothing else, since the EOF `0x1A` was reached.
281        assert_ok_eq!(reader.fill_buf(), b"");
282    }
283}