easyio 0.2.0

Rust library for convenience IO functions
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
use crate::read_full;
use std::io::{self, Read};

enum ReplacingReaderState {
    // the buffer has not been initialized yet
    NotInitialized,

    // the buffer is in this sequence: [4 5 6 7 0 1 2 3]
    LastReadIsMiddle,

    // the buffer is in this sequence: [0 1 2 3 4 5 6 7]
    LastReadIsStart,
}

/// ReplacingReader wraps around an underlying reader and transiently replaces given patterns in the read.
///
/// The pattern must no overlap, in such case the behavior is undefined.
/// The internal buffer is 2 * len(old_pattern), caller can wrap std::io::BufReader if more buffer is required.
///
/// A runtime panic will be thrown if old.len() == 0.
pub struct ReplacingReader<'a> {
    underlying_reader: &'a mut dyn Read,
    // buffer is separated into two parts and has a capacity of 2 * old_pattern.len()
    //
    // buffer:         X X X A | B C X X
    // next_match_ptr:       *
    // read_ptr:       *
    // next time when read_ptr is about to hit next_match_ptr, we transition to feed new to read() call
    buffer: Vec<u8>,
    old_pattern: &'a [u8],
    new_pattern: &'a [u8],
    read_ptr: usize,

    state: ReplacingReaderState,

    // this is the location of eof in the buffer, if already met
    // the last byte should be buffer[eof_position - 1]
    eof_position: Option<usize>,

    // this is the location of the next match, if present
    next_match_ptr: Option<usize>,

    // if this is Some, we are in progress of serving from new_pattern,
    // this should be set to None when serve_new_ptr == Some(new_pattern.size())
    serve_new_ptr: Option<usize>,
}

impl ReplacingReader<'_> {
    pub fn new<'a>(r: &'a mut dyn Read, old: &'a [u8], new: &'a [u8]) -> ReplacingReader<'a> {
        if old.len() ==  0 { panic!("old pattern can not be empty") };

        let buffer = vec![0; 2 * old.len()];
        ReplacingReader {
            underlying_reader: r,
            old_pattern: old,
            new_pattern: new,
            read_ptr: 0,
            buffer: buffer,
            state: ReplacingReaderState::NotInitialized,
            eof_position: None,

            next_match_ptr: None,
            serve_new_ptr: None,
        }
    }

    #[inline(always)]
    fn try_match_from(&self, start: usize) -> bool {
        let mut ptr = start;
        let mut match_len = 0usize;
        loop {
            if match_len == self.old_pattern.len() {
                return true;
            }
            if self.buffer[ptr] == self.old_pattern[match_len] {
                match_len += 1;
                ptr += 1;
                if ptr == self.buffer.len() {
                    ptr = 0;
                }
            } else {
                return false;
            }
        }
    }
}

impl Read for ReplacingReader<'_> {
    fn read(&mut self, buf: &mut [u8]) -> Result<usize, io::Error> {
        let buf_available = buf.len();
        // first check if we are already serving new_pattern
        if let Some(new_ptr) = self.serve_new_ptr {
            let remaining_new_pattern_len = self.new_pattern.len() - new_ptr;
            if remaining_new_pattern_len > buf_available {
                buf.copy_from_slice(&self.new_pattern[new_ptr..new_ptr + buf_available]);
                self.serve_new_ptr = Some(new_ptr + buf_available);
                return Ok(buf_available);
            } else if remaining_new_pattern_len > 0 {
                buf[..remaining_new_pattern_len].copy_from_slice(&self.new_pattern[new_ptr..]);
                self.serve_new_ptr = None;
                return Ok(remaining_new_pattern_len);
            }
        }

        // then, if this read is going to enter self.next_match_ptr?
        if let Some(next_match_ptr) = self.next_match_ptr {
            if next_match_ptr > self.read_ptr {
                let remaining_buf_available = next_match_ptr - self.read_ptr;
                if buf_available >= remaining_buf_available {
                    // we can read until start of match
                    buf[..remaining_buf_available]
                        .copy_from_slice(&self.buffer[self.read_ptr..next_match_ptr]);
                    self.serve_new_ptr = Some(0);
                    self.read_ptr = next_match_ptr + self.old_pattern.len();
                    if self.read_ptr >= self.buffer.len() {
                        self.read_ptr -= self.buffer.len();
                    }
                    self.next_match_ptr = None;
                    return Ok(remaining_buf_available);
                } else {
                    buf.copy_from_slice(&self.buffer[self.read_ptr..self.read_ptr + buf_available]);
                    self.read_ptr += buf_available;
                    return Ok(buf_available);
                }
            } else if next_match_ptr == self.read_ptr {
                self.serve_new_ptr = Some(0);
                self.read_ptr += self.old_pattern.len() ;
                if self.read_ptr >= self.buffer.len() {
                    self.read_ptr -= self.buffer.len();
                }
                self.next_match_ptr = None;
                return self.read(buf);
            } {
                let remaining_buf_available = self.buffer.len() - self.read_ptr;
                if buf_available >= remaining_buf_available {
                    buf[..remaining_buf_available].copy_from_slice(&self.buffer[self.read_ptr..]);
                    self.read_ptr = 0;
                    return Ok(remaining_buf_available);
                } else {
                    buf.copy_from_slice(&self.buffer[self.read_ptr..self.read_ptr + buf_available]);
                    self.read_ptr += buf_available;
                    return Ok(buf_available);
                }
            }
        }

        // initialize the buffer first
        match self.state {
            ReplacingReaderState::NotInitialized => {
                // first we make a full read to fill the buffer
                match read_full(&mut self.buffer, self.underlying_reader) {
                    Ok(read_len) => {
                        if read_len < self.buffer.len() {
                            // we already hit eof
                            self.eof_position = Some(read_len);
                        }
                        if read_len >= self.old_pattern.len() {
                            let possible_match_start = read_len - self.old_pattern.len();
                            for guess_start in 0..possible_match_start {
                                if self.try_match_from(guess_start) {
                                    self.next_match_ptr = Some(guess_start);
                                    break;
                                }
                            }
                        }

                        self.state = ReplacingReaderState::LastReadIsMiddle;
                        return self.read(buf);
                    }
                    Err(e) => return Err(e),
                };
            }
            _ => (),
        };

        // if we are at the end of stream and no patterns were found, nothing to do except serve the last bit of stream until end.
        if let Some(eof_position) = self.eof_position {
            // remaining buffer is from read_ptr to eof_position
            if eof_position < self.read_ptr {
                // read at most into the end of buffer
                let max_read_size = self.buffer.len() - self.read_ptr;
                if max_read_size >= self.old_pattern.len() {
                    for guess_start in self.read_ptr..self.read_ptr + 1 + max_read_size - self.old_pattern.len() {
                        if self.try_match_from(guess_start) {
                            self.next_match_ptr = Some(guess_start % self.buffer.len());
                            return self.read(buf);
                        }
                    }
                }
                if max_read_size > buf_available {
                    buf.copy_from_slice(&self.buffer[self.read_ptr..self.read_ptr + buf_available]);
                    self.read_ptr += buf_available;
                    return Ok(buf_available);
                } else {
                    buf[..max_read_size].copy_from_slice(&self.buffer[self.read_ptr..]);
                    self.read_ptr = 0;
                    return Ok(max_read_size);
                }
            } else if eof_position == self.read_ptr {
                return Ok(0);
            } else {
                let max_read_size = eof_position - self.read_ptr;
                if max_read_size >= self.old_pattern.len() {
                    for guess_start in self.read_ptr..self.read_ptr + 1 + max_read_size - self.old_pattern.len() {
                        if self.try_match_from(guess_start) {
                            self.next_match_ptr = Some(guess_start);
                            return self.read(buf);
                        }
                    }
                }
                if max_read_size > buf_available {
                    buf.copy_from_slice(&self.buffer[self.read_ptr..self.read_ptr + buf_available]);
                    self.read_ptr += buf_available;
                    return Ok(buf_available);
                } else {
                    buf[..max_read_size].copy_from_slice(&self.buffer[self.read_ptr..eof_position]);
                    self.read_ptr += max_read_size;
                    return Ok(max_read_size);
                }
            }
        }

        // here is the general case: either serve until the older half of buffer was empty or we advance buffer and do the actual pattern matching
        let wrap_pos = self.old_pattern.len();
        match self.state {
            ReplacingReaderState::LastReadIsStart => {
                if self.read_ptr >= wrap_pos {
                    let remaining_data_len = self.buffer.len() - self.read_ptr;
                    if buf_available >= remaining_data_len {
                        buf[..remaining_data_len].copy_from_slice(&self.buffer[self.read_ptr..]);
                        self.read_ptr = 0;
                        return Ok(remaining_data_len);
                    } else {
                        buf.copy_from_slice(
                            &self.buffer[self.read_ptr..self.read_ptr + buf_available],
                        );
                        self.read_ptr += buf_available;
                        return Ok(buf_available);
                    }
                }
                // next we read from the middle
                match read_full(&mut self.buffer[wrap_pos..], self.underlying_reader) {
                    Ok(size) => {
                        let mut last_possible_match_start = wrap_pos;
                        if size < self.old_pattern.len() {
                            // eof is met, set eof position
                            let eof_position = wrap_pos + size;
                            last_possible_match_start = eof_position - self.old_pattern.len()  ;
                            self.eof_position = Some(eof_position);
                        }
                        let first_possible_match_start = if self.read_ptr<1 {0} else {self.read_ptr};
                        for guess_start in first_possible_match_start..last_possible_match_start {
                            if self.try_match_from(guess_start) {
                                self.next_match_ptr = Some(guess_start);
                            }
                        }
                    }
                    Err(e) => return Err(e),

                };
                self.state = ReplacingReaderState::LastReadIsMiddle;
            }
            ReplacingReaderState::LastReadIsMiddle => {
                if self.read_ptr < wrap_pos {
                    // we still need to serve up to wrap_pos
                    let remaining_data_len = wrap_pos - self.read_ptr;
                    if buf_available >= remaining_data_len {
                        buf[..remaining_data_len]
                            .copy_from_slice(&self.buffer[self.read_ptr..wrap_pos]);
                        self.read_ptr = wrap_pos;
                        return Ok(remaining_data_len);
                    } else {
                        buf.copy_from_slice(
                            &self.buffer[self.read_ptr..self.read_ptr + buf_available],
                        );
                        self.read_ptr += buf_available;
                        return Ok(buf_available);
                    }
                }
                match read_full(&mut self.buffer[..wrap_pos], self.underlying_reader) {
                    Ok(size) => {
                        let first_possible_match_start =  if self.read_ptr > wrap_pos {self.read_ptr} else {wrap_pos };
                        let mut last_possible_match_start = self.buffer.len();
                        if size < self.old_pattern.len() {
                            let eof_position = size;
                            last_possible_match_start =
                                self.buffer.len() - self.old_pattern.len() + size;
                            self.eof_position = Some(eof_position);
                        }
                        for guess_start in first_possible_match_start..last_possible_match_start {
                            if self.try_match_from(guess_start % self.buffer.len()) {
                                self.next_match_ptr = Some(guess_start % self.buffer.len());
                            }
                        }
                    }
                    Err(e) => return Err(e),
                }
                self.state = ReplacingReaderState::LastReadIsStart;
            }
            _ => panic!("unknown state"),
        }

        return self.read(buf);
    }
}

#[cfg(test)]
mod testconv {

    mod test_replacing_reader {
        use crate::conv::ReplacingReader;
        use std::io::Read;
        use std::fmt::Write;

        fn run_string_through(input: String, old: String, new: String) -> String {
            let mut input_bytes = input.as_bytes();
            let mut reader = ReplacingReader::new(&mut input_bytes, old.as_bytes(), new.as_bytes());
            let mut ret = String::new();
            reader.read_to_string(&mut ret).unwrap();
            ret
        }


        #[test]
        fn test_varying_input_len() {
            let input_pattern = "ab";
            let old_pattern = "ab";
            let new_pattern = "cd";
            for input_len in 0..40 {
                let mut input = input_pattern.repeat(input_len/2);
                let mut expect = new_pattern.repeat(input_len/2);
                if input_len %2 == 1 {
                    input.write_char(input_pattern.chars().nth(0).unwrap()).unwrap();
                    expect.write_char(input_pattern.chars().nth(0).unwrap()).unwrap();
                }

                assert_eq!(
                    run_string_through(input, String::from(old_pattern), String::from(new_pattern)),
                    expect,
                );
            }
        }

        #[test]
        fn test_simple() {
            let input = "abcabcabcabcabc";
            let old = "ab";
            let new = "cde";
            let expect = "cdeccdeccdeccdeccdec";
            assert_eq!(
                run_string_through(String::from(input), String::from(old), String::from(new)),
                String::from(expect)
            );
        }

        #[test]
        fn test_zero_new() {
            let input = "abcabcabcabcabc";
            let old = "ab";
            let expect = "ccccc";
            assert_eq!(
                run_string_through(String::from(input), String::from(old), String::new()),
                String::from(expect)
            );
        }

        #[test]
        fn test_insert_two_places() {
            let base_str = String::from("012345678901234567890123456789");

            for n_prefix in 0..5 {
                for insert_len in 1..8usize {
                    for insert_pos_1 in 0..base_str.len() {
                        for insert_pos_2 in insert_pos_1+1..base_str.len() {
                            let mut insert_pattern = String::new();
                            for i in 0..insert_len {
                                insert_pattern.write_char(std::char::from_u32('a' as u32 + i as u32).unwrap()).unwrap();
                            }
                            let replace_to = String::from("test");

                            let mut input_str = "_".repeat(n_prefix);
                            let mut expect_str = "_".repeat(n_prefix);
                            input_str.write_str(&base_str[..insert_pos_1]).unwrap();
                            expect_str.write_str(&base_str[..insert_pos_1]).unwrap();

                            input_str.write_str(&insert_pattern).unwrap();
                            expect_str.write_str(&replace_to).unwrap();

                            input_str.write_str(&base_str[insert_pos_1..insert_pos_2]).unwrap();
                            expect_str.write_str(&base_str[insert_pos_1..insert_pos_2]).unwrap();

                            input_str.write_str(&insert_pattern).unwrap();
                            expect_str.write_str(&replace_to).unwrap();

                            input_str.write_str(&base_str[insert_pos_2..]).unwrap();
                            expect_str.write_str(&base_str[insert_pos_2..]).unwrap();

                            assert_eq!(run_string_through(input_str, insert_pattern, replace_to), expect_str);
                        }
                    }
                }
            }

        }
    }
}