commons-io 0.1.4

rust lang common io utils
Documentation
//! #  split read
//!
//! split a read by a binary separator
//!
//! example
//! ```
//!    use std::fs::File;
//!    use std::fs;
//!    use commons_io::split::Split;
//!    use std::io::{Read, Write};
//!    let mut fw = File::create("split_text.txt").unwrap();
//!    fw.write_all("sdfsdfgtdbp_trans_log.id78989ll,tdbp_trans_log.ord_id".as_bytes());
//!    let fr = File::open("split_text.txt").unwrap();
//!    let separator = vec![116, 100, 098, 112, 095, 116, 114, 097, 110, 115];
//!    let mut split = Split::new(fr, &separator);
//!    let mut match_parts = vec!["_log.ord_id", "_log.id78989ll,", "sdfsdfg"];
//!    loop {
//!        match split.next_part() {
//!            None => {
//!                break;
//!            }
//!            Some(mut part) => {
//!                let mut s = String::new();
//!                part.read_to_string(&mut s);
//!                assert_eq!(match_parts.pop().unwrap(), s);
//!            }
//!        }
//!    }
//!    fs::remove_file("split_text.txt");
//! ```


use std::io::{Read, Write};
use FocusStat::{EMPTY, MatchOffset, EOF};
use std::cmp;
use commons_lang::array::array_equal;

pub struct Split<'separator, T: Read>
{
    origin: T,
    focus: Vec<u8>,
    buf: Box<[u8]>,
    focus_stat: FocusStat,
    separator: &'separator Vec<u8>,
}

enum FocusStat {
    EMPTY,
    MatchOffset(usize),
    EOF,
}


impl<'separator, T: Read> Split<'separator, T> {

    pub fn new(origin: T, separator: &'separator Vec<u8>) -> Split<T>
    {
        Split {
            origin,
            focus: Vec::with_capacity(separator.len()),
            buf: vec![0; separator.len()].into_boxed_slice(),
            separator,
            focus_stat: EMPTY,
        }
    }

    pub fn next_part(&mut self) -> Option<Part<'_, 'separator, T>> {
        match self.focus_stat {
            EOF => None,
            _ => {
                self.focus_stat = EMPTY;
                self.focus.clear();
                return Some(Part::new(self));
            }
        }
    }

    pub fn into_origin(self) -> T{
        self.origin
    }

    fn load_to_focus(&mut self) {
        //focus读满
        loop {
            let buf_size = cmp::min(self.separator.len() - self.focus.len(), self.buf.len());
            if let Ok(size) = self.origin.read(&mut self.buf[..buf_size]) {
                if size == 0 {
                    break;
                } else {
                    self.focus.write_all(&mut self.buf[..size]).unwrap();
                }
            }
        }

        //到达终点
        if self.focus.len() == 0 {
            self.focus_stat = EOF;
        } else {
            if self.focus.len() < self.separator.len(){
                // focus中的内容少于separator, 表示读到最后的少量数据, 必定不是separator, 无需匹配
                self.focus_stat = MatchOffset(self.focus.len());
            } else {
                //从头部开始匹配
                self.focus_stat = MatchOffset(self.match_offset());
            }
        }
    }

    fn match_offset(&self) -> usize {
        let mut fi = 0;
        loop {
            if array_equal(&self.focus[fi..], &self.separator[0..self.focus.len() - fi]) {
                return fi;
            } else {
                fi += 1;
                if fi == self.focus.len() {
                    return fi;
                }
            }
        }
    }

    fn read_from_focus(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        match self.focus_stat {
            EMPTY | EOF => Ok(0),
            MatchOffset(offset) => {
                if offset == 0 {
                    return Ok(0);
                }

                //只读取offset之前的数据
                let buf_size_min = cmp::min(offset, buf.len());
                buf[..buf_size_min].copy_from_slice(&self.focus[..buf_size_min]);
                self.focus.drain(..buf_size_min);

                if self.focus.len() == 0 {
                    self.focus_stat = EMPTY;
                } else {
                    // 可能是separator的头部
                    // 将focus读满,并检查匹配
                    self.load_to_focus();
                }
                Ok(buf_size_min)
            }
        }
    }

    fn exhaust_part(&mut self) {

        //offset为0, 表示focus中内容为separator, 即part已耗尽
        if let MatchOffset(offset) = self.focus_stat {
            if offset == 0 {
                return;
            }
        }

        let mut buf = vec![0; self.separator.len()].into_boxed_slice();

        loop {
            if let EMPTY = self.focus_stat {
                self.load_to_focus();
            }

            let result = match self.focus_stat {
                EMPTY | EOF => Ok(0),
                MatchOffset(_) => self.read_from_focus(&mut buf)
            };

            match result {
                Ok(size) if size == 0 => break,
                _ => continue
            }
        }
    }
}


pub struct Part<'split, 'separator, T: Read> {
    split: &'split mut Split<'separator, T>
}

impl<'split, 'separator, T: Read> Part<'split, 'separator, T> {
    pub fn new(split: &'split mut Split<'separator, T>) -> Part<'split, 'separator, T>
    {
        Part {
            split
        }
    }
}

impl<T: Read> Read for Part<'_, '_, T> {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        if let EMPTY = self.split.focus_stat {
            self.split.load_to_focus();
        }

        match self.split.focus_stat {
            EMPTY | EOF => Ok(0),
            MatchOffset(_) => self.split.read_from_focus(buf)
        }
    }
}

impl<T: Read> Drop for Part<'_, '_, T>
{
    fn drop(&mut self) {
        self.split.exhaust_part();
    }
}