temex 0.10.0

Regex-like temporal expressions for evaluating systems that change over time
Documentation
use crate::temex_error::TemexError;
use crate::temex_trace::TemexTrace;
use core::str::FromStr;
use std::fs::File;
use std::io::{BufRead, BufReader, Read};
use std::iter::Iterator;

// To get the labels from the CSV header and ensure they are in the correct format.
fn get_labels<R: Read>(buffer: &mut BufReader<R>) -> Result<Vec<String>, TemexError> {
    // get the labels
    let mut header_line = String::new();
    buffer.read_line(&mut header_line)?;
    header_line = header_line
        .chars()
        .filter(|c| !(*c).is_whitespace())
        .collect();

    let labels: Vec<String> = header_line.split(',').map(|x| x.to_owned()).collect();

    // ensure each label is properly formatted
    for label in &labels {
        let mut cs = label.chars();
        let first = cs.next().ok_or(TemexError::Other(
            "Improperly formatted label in trace".to_owned(),
        ))?;
        if !first.is_alphabetic() {
            return Err(TemexError::UnexpectedCharacter(first));
        }
        for c in cs {
            if !c.is_alphanumeric() && c != '_' {
                return Err(TemexError::UnexpectedCharacter(c));
            }
        }
    }
    Ok(labels)
}

// In a TemexTrace, the labels are in lexicographic order, but this may not be true
// of the input. Since the columns match the label order, we need a mapping from
// the input's label order to the TemexTrace order that will let us put values found
// in the input to the correct column as we build the TemexTrace.
fn get_idx_mapping(input_labels: &[String], sorted_labels: &[String]) -> Vec<usize> {
    let mut input_idx_to_sorted: Vec<usize> = vec![];

    for label in input_labels.iter() {
        let mut sorted_iter = sorted_labels.iter();
        let idx_in_sorted = sorted_iter.position(|x| x == label).unwrap();
        input_idx_to_sorted.push(idx_in_sorted);
    }
    input_idx_to_sorted
}

// Convert a line from the input to a trace element.
fn get_trace_element(line: String, input_idx_to_sorted: &Vec<usize>) -> Result<String, TemexError> {
    // get row, with values in their original order
    let original_row: Vec<char> = line
        .chars()
        .filter(|c| *c != ',' && !(*c).is_whitespace())
        .map(|c| {
            if c != '0' && c != '1' {
                return Err(TemexError::UnexpectedCharacter(c));
            }
            Ok(c)
        })
        .collect::<Result<Vec<char>, TemexError>>()?;

    // ensure the row has enough values to be valid
    if original_row.len() != input_idx_to_sorted.len() {
        return Err(TemexError::Other(
            "Length of data row not equal to length of label row".to_owned(),
        ));
    }

    // this will hold the row whose values are in the order that matches
    // the column labels if they are sorted lexicographically
    let mut row: Vec<char> = vec!['0'; original_row.len()];

    for i in 0..original_row.len() {
        let c = original_row[i];
        let sorted_idx = input_idx_to_sorted[i];
        row[sorted_idx] = c;
    }

    // newlines are used to delimit trace elements
    row.push('\n');

    Ok(row.into_iter().collect())
}

fn read_try_from<R: Read>(source: R) -> Result<TemexTrace, TemexError> {
    let mut buffer = BufReader::new(source);
    let labels = get_labels(&mut buffer)?;
    let mut sorted_labels = labels.clone();
    sorted_labels.sort_unstable();
    let input_idx_to_sorted = get_idx_mapping(&labels, &sorted_labels);

    let mut trace = String::new();
    for line_result in buffer.lines() {
        let line = line_result?;
        let trace_element = get_trace_element(line, &input_idx_to_sorted)?;
        trace.push_str(&trace_element);
    }

    Ok(TemexTrace {
        labels: sorted_labels,
        data: trace.as_bytes().to_vec(),
    })
}

// Due to current limitations in the Rust compiler it is impossible implement
// TryFrom on the Read trait for TemexTrace, so here are some explicit implementations.
impl TryFrom<&File> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: &File) -> Result<Self, Self::Error> {
        read_try_from(source)
    }
}

impl TryFrom<File> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: File) -> Result<Self, Self::Error> {
        read_try_from(source)
    }
}

impl TryFrom<&[u8]> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: &[u8]) -> Result<Self, Self::Error> {
        read_try_from(source)
    }
}

impl TryFrom<&std::net::TcpStream> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: &std::net::TcpStream) -> Result<Self, Self::Error> {
        read_try_from(source)
    }
}

impl TryFrom<std::net::TcpStream> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: std::net::TcpStream) -> Result<Self, Self::Error> {
        read_try_from(source)
    }
}

impl TryFrom<&str> for TemexTrace {
    type Error = TemexError;

    fn try_from(source: &str) -> Result<Self, Self::Error> {
        read_try_from(source.as_bytes())
    }
}

impl FromStr for TemexTrace {
    type Err = TemexError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        TemexTrace::try_from(s.as_bytes())
    }
}

#[cfg(test)]
mod tests {

    use super::*;
    const TRACE1: &[u8] = "p1, p2, p3\n0,0,0\n1,1,1\n0, 0,0\n".as_bytes();
    const TRACE2: &[u8] = "wakka wakka, p1, p2, p3\n0,0,0\n1,1,1\n0, 0,0\n".as_bytes();

    #[test]
    fn csv_to_normal_form_works() {
        let tnf = TemexTrace::try_from(TRACE1).unwrap();

        assert_eq!(
            tnf.labels,
            vec!["p1".to_string(), "p2".to_string(), "p3".to_string()]
        );
        assert_eq!(tnf.data, "000\n111\n000\n".as_bytes());
    }

    #[test]
    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value")]
    fn missing_values_column() {
        let _ = TemexTrace::try_from(TRACE2).unwrap();
    }
}