commons_io/
split.rs

1//! #  split read
2//!
3//! split a read by a binary separator
4//!
5//! example
6//! ```
7//!    use std::fs::File;
8//!    use std::fs;
9//!    use commons_io::split::Split;
10//!    use std::io::{Read, Write};
11//!    let mut fw = File::create("split_text.txt").unwrap();
12//!    fw.write_all("sdfsdfgtdbp_trans_log.id78989ll,tdbp_trans_log.ord_id".as_bytes());
13//!    let fr = File::open("split_text.txt").unwrap();
14//!    let separator = vec![116, 100, 098, 112, 095, 116, 114, 097, 110, 115];
15//!    let mut split = Split::new(fr, &separator);
16//!    let mut match_parts = vec!["_log.ord_id", "_log.id78989ll,", "sdfsdfg"];
17//!    loop {
18//!        match split.next_part() {
19//!            None => {
20//!                break;
21//!            }
22//!            Some(mut part) => {
23//!                let mut s = String::new();
24//!                part.read_to_string(&mut s);
25//!                assert_eq!(match_parts.pop().unwrap(), s);
26//!            }
27//!        }
28//!    }
29//!    fs::remove_file("split_text.txt");
30//! ```
31
32
33use std::io::{Read, Write};
34use FocusStat::{EMPTY, MatchOffset, EOF};
35use std::cmp;
36use commons_lang::array::array_equal;
37
38pub struct Split<'separator, T: Read>
39{
40    origin: T,
41    focus: Vec<u8>,
42    buf: Box<[u8]>,
43    focus_stat: FocusStat,
44    separator: &'separator Vec<u8>,
45}
46
47enum FocusStat {
48    EMPTY,
49    MatchOffset(usize),
50    EOF,
51}
52
53
54impl<'separator, T: Read> Split<'separator, T> {
55
56    pub fn new(origin: T, separator: &'separator Vec<u8>) -> Split<T>
57    {
58        Split {
59            origin,
60            focus: Vec::with_capacity(separator.len()),
61            buf: vec![0; separator.len()].into_boxed_slice(),
62            separator,
63            focus_stat: EMPTY,
64        }
65    }
66
67    pub fn next_part(&mut self) -> Option<Part<'_, 'separator, T>> {
68        match self.focus_stat {
69            EOF => None,
70            _ => {
71                self.focus_stat = EMPTY;
72                self.focus.clear();
73                return Some(Part::new(self));
74            }
75        }
76    }
77
78    pub fn into_origin(self) -> T{
79        self.origin
80    }
81
82    fn load_to_focus(&mut self) {
83        //focus读满
84        loop {
85            let buf_size = cmp::min(self.separator.len() - self.focus.len(), self.buf.len());
86            if let Ok(size) = self.origin.read(&mut self.buf[..buf_size]) {
87                if size == 0 {
88                    break;
89                } else {
90                    self.focus.write_all(&mut self.buf[..size]).unwrap();
91                }
92            }
93        }
94
95        //到达终点
96        if self.focus.len() == 0 {
97            self.focus_stat = EOF;
98        } else {
99            if self.focus.len() < self.separator.len(){
100                // focus中的内容少于separator, 表示读到最后的少量数据, 必定不是separator, 无需匹配
101                self.focus_stat = MatchOffset(self.focus.len());
102            } else {
103                //从头部开始匹配
104                self.focus_stat = MatchOffset(self.match_offset());
105            }
106        }
107    }
108
109    fn match_offset(&self) -> usize {
110        let mut fi = 0;
111        loop {
112            if array_equal(&self.focus[fi..], &self.separator[0..self.focus.len() - fi]) {
113                return fi;
114            } else {
115                fi += 1;
116                if fi == self.focus.len() {
117                    return fi;
118                }
119            }
120        }
121    }
122
123    fn read_from_focus(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
124        match self.focus_stat {
125            EMPTY | EOF => Ok(0),
126            MatchOffset(offset) => {
127                if offset == 0 {
128                    return Ok(0);
129                }
130
131                //只读取offset之前的数据
132                let buf_size_min = cmp::min(offset, buf.len());
133                buf[..buf_size_min].copy_from_slice(&self.focus[..buf_size_min]);
134                self.focus.drain(..buf_size_min);
135
136                if self.focus.len() == 0 {
137                    self.focus_stat = EMPTY;
138                } else {
139                    // 可能是separator的头部
140                    // 将focus读满,并检查匹配
141                    self.load_to_focus();
142                }
143                Ok(buf_size_min)
144            }
145        }
146    }
147
148    fn exhaust_part(&mut self) {
149
150        //offset为0, 表示focus中内容为separator, 即part已耗尽
151        if let MatchOffset(offset) = self.focus_stat {
152            if offset == 0 {
153                return;
154            }
155        }
156
157        let mut buf = vec![0; self.separator.len()].into_boxed_slice();
158
159        loop {
160            if let EMPTY = self.focus_stat {
161                self.load_to_focus();
162            }
163
164            let result = match self.focus_stat {
165                EMPTY | EOF => Ok(0),
166                MatchOffset(_) => self.read_from_focus(&mut buf)
167            };
168
169            match result {
170                Ok(size) if size == 0 => break,
171                _ => continue
172            }
173        }
174    }
175}
176
177
178pub struct Part<'split, 'separator, T: Read> {
179    split: &'split mut Split<'separator, T>
180}
181
182impl<'split, 'separator, T: Read> Part<'split, 'separator, T> {
183    pub fn new(split: &'split mut Split<'separator, T>) -> Part<'split, 'separator, T>
184    {
185        Part {
186            split
187        }
188    }
189}
190
191impl<T: Read> Read for Part<'_, '_, T> {
192    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
193        if let EMPTY = self.split.focus_stat {
194            self.split.load_to_focus();
195        }
196
197        match self.split.focus_stat {
198            EMPTY | EOF => Ok(0),
199            MatchOffset(_) => self.split.read_from_focus(buf)
200        }
201    }
202}
203
204impl<T: Read> Drop for Part<'_, '_, T>
205{
206    fn drop(&mut self) {
207        self.split.exhaust_part();
208    }
209}
210