ctrl_z/lib.rs
1//! A composable reader to treat `0x1A` as an end-of-file marker.
2//!
3//! Historically, `0x1A` (commonly referred to as `CTRL-Z`, `^Z`, or a "substitute character") was used
4//! in old systems to explicitly mark the end of a file. While modern systems no longer require this
5//! practice, some legacy files still contain this byte to mark the end of a file. This library
6//! provides a reader to treat `0x1A` as the end of a file, rather than reading it as a regular byte.
7//!
8//! # Usage
9//! This library provides a reader in the form of a `struct` named `ReadToCtrlZ`. As is common
10//! practice, this reader is composable with other types implementing the
11//! [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html) or
12//! [`BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) traits. The reader checks the
13//! returned bytes for the presence of the EOF marker `0x1A` and stops reading when it is encountered.
14//!
15//! # Example
16//! For example, the reader defined below only reads until the `0x1A` byte, at which point it stops
17//! reading.
18//!
19//! ```
20//! use ctrl_z::ReadToCtrlZ;
21//! use std::io::Read;
22//! #
23//! # // Redefines `[u8]:as_slice()` for backwards compatibility.
24//! # trait AsSlice {
25//! # fn as_slice(&self) -> &[u8];
26//! # }
27//! #
28//! # impl AsSlice for [u8] {
29//! # fn as_slice(&self) -> &[u8] {
30//! # self
31//! # }
32//! # }
33//!
34//! let mut reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
35//! let mut output = String::new();
36//!
37//! // Reading omits the final `0x1A` byte.
38//! assert!(reader.read_to_string(&mut output).is_ok());
39//! assert_eq!(output, "foo");
40//! ```
41
42#![allow(deprecated)]
43
44#[cfg(test)]
45#[macro_use]
46extern crate claim;
47
48use std::io::BufRead;
49use std::io::Error;
50use std::io::ErrorKind;
51use std::io::Read;
52use std::io::Result;
53use std::slice;
54
55/// A composable reader to read until a `0x1A` byte (commonly known as `CTRL-Z` or the "substitute
56/// character") is encountered.
57///
58/// This `struct` is a wrapper around another type that implements [`Read`] or [`BufRead`]. Calls
59/// to the methods of those traits will be forwarded to the interior type until a `0x1A` byte is
60/// read, at which point reading will cease.
61///
62/// # Example
63/// Here is an example of a `ReadToCrtlZ` wrapped around a `&[u8]`, which implements [`Read`].
64///
65/// ```
66/// use ctrl_z::ReadToCtrlZ;
67/// use std::io::Read;
68/// #
69/// # // Redefines `[u8]:as_slice()` for backwards compatibility.
70/// # trait AsSlice {
71/// # fn as_slice(&self) -> &[u8];
72/// # }
73/// #
74/// # impl AsSlice for [u8] {
75/// # fn as_slice(&self) -> &[u8] {
76/// # self
77/// # }
78/// # }
79///
80/// let mut reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
81/// let mut output = String::new();
82///
83/// // Reading omits the final `0x1A` byte.
84/// assert!(reader.read_to_string(&mut output).is_ok());
85/// assert_eq!(output, "foo");
86/// ```
87pub struct ReadToCtrlZ<R> {
88 /// The internal reader being read.
89 inner: R,
90 /// Whether or not the EOF `0x1A` byte has been reached.
91 terminated: bool,
92}
93
94impl<R> ReadToCtrlZ<R> {
95 /// Creates a new `ReadToCtrlZ`, wrapping the provided reader.
96 ///
97 /// # Example
98 /// Here is an example of creating a new `ReadToCtrlZ` wrapping around a `&[u8]`.
99 ///
100 /// ```
101 /// use ctrl_z::ReadToCtrlZ;
102 /// #
103 /// # // Redefines `[u8]:as_slice()` for backwards compatibility.
104 /// # trait AsSlice {
105 /// # fn as_slice(&self) -> &[u8];
106 /// # }
107 /// #
108 /// # impl AsSlice for [u8] {
109 /// # fn as_slice(&self) -> &[u8] {
110 /// # self
111 /// # }
112 /// # }
113 ///
114 /// let reader = ReadToCtrlZ::new(b"foo\x1a".as_slice());
115 /// ```
116 pub fn new(inner: R) -> Self {
117 ReadToCtrlZ {
118 inner: inner,
119 terminated: false,
120 }
121 }
122}
123
124impl<R> Read for ReadToCtrlZ<R>
125where
126 R: Read,
127{
128 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
129 if self.terminated {
130 return Ok(0);
131 }
132
133 let n = try!(self.inner.read(buf));
134 for i in 0..n {
135 if *try!(buf.get(i).ok_or_else(|| {
136 Error::new(ErrorKind::Other, "buffer smaller than amount of bytes read")
137 })) == b'\x1a'
138 {
139 self.terminated = true;
140 return Ok(i);
141 }
142 }
143 Ok(n)
144 }
145}
146
147impl<R> BufRead for ReadToCtrlZ<R>
148where
149 R: BufRead,
150{
151 fn fill_buf(&mut self) -> Result<&[u8]> {
152 if self.terminated {
153 return Ok(&[]);
154 }
155
156 let buf = try!(self.inner.fill_buf());
157 for i in 0..buf.len() {
158 // SAFETY: `i` is guaranteed to be a valid index into `buf`.
159 if *unsafe { buf.get_unchecked(i) } == b'\x1a' {
160 if i == 0 {
161 self.terminated = true;
162 }
163 // SAFETY: The range `..i` is guaranteed to be a valid index into `buf`.
164 return Ok(unsafe { slice::from_raw_parts(buf.as_ptr(), i) });
165 }
166 }
167 Ok(buf)
168 }
169
170 fn consume(&mut self, amount: usize) {
171 self.inner.consume(amount);
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178 use std::io::BufRead;
179 use std::io::ErrorKind;
180 use std::io::Read;
181 use std::io::Result;
182
183 #[test]
184 fn read_exclude_ctrl_z() {
185 let mut output = String::new();
186
187 assert_ok_eq!(
188 ReadToCtrlZ::new(b"foo\x1a" as &[u8]).read_to_string(&mut output),
189 3
190 );
191 assert_eq!(output, "foo");
192 }
193
194 #[test]
195 fn read_no_ctrl_z() {
196 let mut output = String::new();
197
198 assert_ok_eq!(
199 ReadToCtrlZ::new(b"foo" as &[u8]).read_to_string(&mut output),
200 3
201 );
202 assert_eq!(output, "foo");
203 }
204
205 #[test]
206 fn read_stop_at_ctrl_z() {
207 let mut output = String::new();
208
209 assert_ok_eq!(
210 ReadToCtrlZ::new(b"foo\x1abar" as &[u8]).read_to_string(&mut output),
211 3
212 );
213 assert_eq!(output, "foo");
214 }
215
216 #[test]
217 fn read_after_ctrl_z() {
218 let mut output = String::new();
219 let mut reader = ReadToCtrlZ::new(b"foo\x1abar" as &[u8]);
220
221 assert_ok_eq!(reader.read_to_string(&mut output), 3);
222 assert_eq!(output, "foo");
223
224 // This indicates the reader has reached EOF.
225 assert_ok_eq!(reader.read_to_string(&mut output), 0);
226 }
227
228 struct BadReader;
229
230 impl Read for BadReader {
231 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
232 Ok(buf.len() + 1)
233 }
234 }
235
236 #[test]
237 fn read_with_bad_inner() {
238 let error = assert_err!(ReadToCtrlZ::new(BadReader).read(&mut []));
239
240 assert_eq!(error.kind(), ErrorKind::Other);
241 assert_eq!(
242 error.to_string(),
243 "buffer smaller than amount of bytes read"
244 )
245 }
246
247 #[test]
248 fn buf_read_exclude_ctrl_z() {
249 assert_ok_eq!(ReadToCtrlZ::new(b"foo\x1a" as &[u8]).fill_buf(), b"foo");
250 }
251
252 #[test]
253 fn buf_read_no_ctrl_z() {
254 assert_ok_eq!(ReadToCtrlZ::new(b"foo" as &[u8]).fill_buf(), b"foo");
255 }
256
257 #[test]
258 fn buf_read_stop_at_ctrl_z() {
259 assert_ok_eq!(ReadToCtrlZ::new(b"foo\x1abar" as &[u8]).fill_buf(), b"foo");
260 }
261
262 #[test]
263 fn buf_read_after_ctrl_z() {
264 let mut reader = ReadToCtrlZ::new(b"foo\x1abar" as &[u8]);
265
266 assert_ok_eq!(reader.fill_buf(), b"foo");
267 reader.consume(3);
268
269 // The reader should return nothing else, since the EOF `0x1A` was reached.
270 assert_ok_eq!(reader.fill_buf(), b"");
271 }
272
273 #[test]
274 fn buf_read_after_starting_ctrl_z() {
275 let mut reader = ReadToCtrlZ::new(b"\x1abar" as &[u8]);
276
277 // Should stop before "bar".
278 assert_ok_eq!(reader.fill_buf(), b"");
279
280 // The reader should return nothing else, since the EOF `0x1A` was reached.
281 assert_ok_eq!(reader.fill_buf(), b"");
282 }
283}