csplit 0.1.0

a clone of the unix coreutil csplit
use super::{Error, Result};
use regex::Regex;
#[cfg(test)]
mod tests;

#[derive(Debug, Clone)]
/// A pattern to match to mark the end of the current file
pub enum Pattern {
    /// Create a file containing the input from the current line to (but not
    /// including) the next line matching the given basic regular
    /// expression.  An optional offset from the line that matched may
    /// be specified. If `skip`, do not create the file
    Match {
        re: Regex,
        offset: i64,
        skip: bool,
        repeat: usize,
    },
    /// Create a file containing the input from the current line to (but not
    /// including) the specified line number.
    UntilLine(usize),
    NLines {
        n: usize,
        repeat: usize,
    },
}

impl Pattern {
    pub fn find(&self, start_line: usize, current_line: usize, s: &str) -> Option<(usize, bool)> {
        match self {
            Pattern::Match { re, offset, skip, .. } if re.is_match(s) => {
                let write = !*skip;
                let end = i64::max(current_line as i64 + *offset, 0) as usize;
                if end > start_line {
                    Some((end, write))
                } else {
                    None
                }
            },
            Pattern::UntilLine(line) if current_line == *line => Some((current_line, true)),
            Pattern::NLines { n, .. } if (current_line - start_line) == *n => Some((current_line, true)),
            _ => None,
        }
    }
}
#[derive(Debug, Clone)]
enum Builder {
    /// Create a file containing the input from the current line to (but not
    /// including) the next line matching the given basic regular
    /// expression.  An optional offset from the line that matched may
    /// be specified.
    Regex { re: regex::Regex, offset: i64, skip: bool },
    /// Read input from the current line to (but not including) the next line
    /// matching the given basic regular expression.  An optional offset from
    /// the line that matched may be specified.
    /// Create a file containing the input from the current line to (but not
    /// including) the specified line number.
    Lines(usize),
    /// Repeat the previous pattern the specified number of times.
    /// If `Repeat{n, Box(WriteNextLines(m))}` it will create `n` files which
    /// are `m` lines each.`
    Repeat(usize),
}

impl Builder {
    pub fn new(arg: &str) -> Result<Self> {
        let arg = arg.as_ref();
        if let Some((pattern, offset)) = Self::write(arg) {
            Ok(Builder::Regex {
                re: Regex::new(pattern)?,
                offset,
                skip: false,
            })
        } else if let Some((pattern, offset)) = Self::seek(arg) {
            Ok(Builder::Regex {
                re: Regex::new(pattern)?,
                offset,
                skip: true,
            })
        } else {
            match Self::line(arg) {
                Some(0) | None => {},
                Some(n) => return Ok(Builder::Lines(n)),
            }

            match Self::repeat(arg) {
                Some(0) | None => Err(Error::NoPattern(arg.to_owned())),
                Some(n) => Ok(Builder::Repeat(n)),
            }
        }
    }

    fn write(s: &str) -> Option<(&str, i64)> {
        lazy_static! {
            static ref RE: Regex = Regex::new(concat!(r"^/(.+)/", r"([+-]?[0-9]+)?$")).unwrap();
        }
        if let Some(caps) = RE.captures(s) {
            match (caps.get(1), caps.get(2)) {
                (Some(re), Some(n)) => Some((re.as_str(), n.as_str().parse().unwrap())),
                // unwrap is OK since the regex already makes sure it's a valid integer
                (Some(re), None) => Some((re.as_str(), 1)),
                _ => None,
            }
        } else {
            None
        }
    }

    fn seek(s: &str) -> Option<(&str, i64)> {
        lazy_static! {
            static ref RE: Regex = Regex::new(concat!(r"^%(.+)%", r"([+-]?[0-9]+)?$")).unwrap();
        }
        if let Some(caps) = RE.captures(s) {
            match (caps.get(1), caps.get(2)) {
                (Some(re), Some(n)) => Some((re.as_str(), n.as_str().parse().unwrap())),
                // unwrap is OK since the regex already makes sure it's a valid integer
                (Some(re), None) => Some((re.as_str(), 1)),
                _ => None,
            }
        } else {
            None
        }
    }

    fn line(s: &str) -> Option<usize> { s.parse::<usize>().ok() }

    fn repeat(s: &str) -> Option<usize> {
        if s.starts_with('{') && s.ends_with('}') {
            s[1..s.len() - 1].parse::<usize>().ok()
        } else {
            None
        }
    }
}

pub fn build_from_args(args: &Vec<std::ffi::OsString>) -> Result<Vec<(Pattern)>> {
    let args: Result<Vec<&str>> = args
        .into_iter()
        .map(|arg| match arg.to_str() {
            Some(s) => Ok(s),
            None => Err(Error::BadString(arg.to_owned())),
        })
        .collect();
    build(args?)
}
fn build(args: Vec<&str>) -> Result<Vec<Pattern>> {
    let mut patterns = Vec::with_capacity(args.len());
    let mut repeat: Option<usize> = None;

    for (i, arg) in (args.into_iter().enumerate()).rev() {
        let pattern = match Builder::new(arg.as_ref())? {
            Builder::Repeat(_) if i == 0 => {
                return Err(Error::BadBuilder(
                    "can't have a repeat pattern '{n}' as the first pattern.",
                ))
            },
            Builder::Repeat(0) => {
                return Err(Error::BadBuilder(
                    "repeat pattern '{0}' is invalid; can't repeat something zero times",
                ))
            },
            Builder::Lines(0) => {
                return Err(Error::BadBuilder(
                    "lines pattern '0' is invalid; must be positive integer",
                ))
            },
            Builder::Repeat(n) => {
                repeat = Some(if let Some(m) = repeat { m * n } else { n });
                continue;
            },
            Builder::Regex { re, offset, skip } => Pattern::Match {
                re,
                offset,
                skip,
                repeat: if let Some(r) = repeat { r } else { 1 },
            },
            Builder::Lines(n) if repeat.is_some() => Pattern::NLines {
                n,
                repeat: repeat.unwrap() - 1,
            },
            Builder::Lines(n) => Pattern::UntilLine(n),
        };
        patterns.push(pattern);
        repeat = None;
    }
    Ok(patterns)
}

impl PartialEq for Pattern {
    fn eq(&self, rhs: &Self) -> bool {
        use self::Pattern::*;
        match (self, rhs) {
            (
                Match {
                    re: a0,
                    offset: b0,
                    skip: c0,
                    repeat: d0,
                },
                Match {
                    re: a1,
                    offset: b1,
                    skip: c1,
                    repeat: d1,
                },
            ) => d1 == d0 && c0 == c1 && b0 == b1 && a0.as_str() == a1.as_str(),
            (UntilLine(n), UntilLine(m)) => n == m,
            (NLines { n: n0, repeat: r0 }, NLines { n: n1, repeat: r1 }) => n0 == n1 && r0 == r1,
            _ => false,
        }
    }
}

impl PartialEq for Builder {
    fn eq(&self, rhs: &Self) -> bool {
        use self::Builder::*;
        match (self, rhs) {
            (Repeat(n), Repeat(m)) => m == n,
            (
                Regex {
                    re: a,
                    offset: i,
                    skip: x,
                },
                Regex {
                    re: b,
                    offset: j,
                    skip: y,
                },
            ) => y == x && i == j && a.as_str() == b.as_str(),
            (Lines(n), Lines(m)) => m == n,
            _ => false,
        }
    }
}