extern crate flate2;
extern crate libc;
extern crate regex;
use std::fs::File;
use std::io::{Read, BufReader, BufRead, Stdin, stdin};
use self::flate2::read::GzDecoder;
use self::regex::Regex;
use std::iter::Skip;
use std::slice::Iter;
use std::fmt;
use std::str::FromStr;
use std::{io as stdio};
use vectors::copy_memory;
use matrix::Matrix;
pub trait CsvString {
fn to_csv(&self, delim: &str) -> String;
}
impl <T: fmt::Display> CsvString for Vec<T> {
fn to_csv(&self, delim: &str) -> String {
self[..].to_csv(delim)
}
}
impl <T: fmt::Display> CsvString for [T] {
fn to_csv(&self, delim: &str) -> String {
self.iter().enumerate()
.map(|(c, val)|
match c {
0 => format!("{}", val),
_ => format!("{}{}", delim, val)
}
)
.fold(String::new(), |s, val| s + &val)
}
}
impl <T: fmt::Display + Clone> CsvString for Matrix<T> {
fn to_csv(&self, delim: &str) -> String {
self.row_iter()
.map(|row| row.to_csv(delim))
.fold(String::new(), |s, val| s + &val + "\n")
}
}
pub struct GzipData {
v: Vec<u8>,
idx: usize
}
impl <'b> GzipData {
pub fn from_file(fname: &str) -> Result<GzipData, &'static str> {
let mut r: Vec<u8> = Vec::new();
try!(
try!(GzDecoder::new(
try!(File::open(fname)
.map_err(|_| "Could not open file")
)
)
.map_err(|_| "Invalid gzip header.")
)
.read_to_end(&mut r)
.map_err(|_| "Could not unzip data.")
);
Ok(GzipData {
v: r,
idx: 0
})
}
pub fn from_buf(v: Vec<u8>) -> GzipData {
GzipData {
v: v,
idx: 0
}
}
pub fn into_bytes(&self) -> Vec<u8> { self.v.clone() }
pub fn len(&self) -> usize { self.v.len() }
pub fn iter(&self) -> Skip<Iter<u8>> { self.v.iter().skip(self.idx) }
pub fn buf(&'b self) -> &'b [u8] { &self.v.split_at(self.idx).1 }
}
impl Read for GzipData {
fn read(&mut self, buf: &mut [u8]) -> stdio::Result<usize> {
if self.idx >= self.v.len() {
return Ok(0);
}
let n = buf.len();
let c = copy_memory(
buf,
self.v.split_at(self.idx).1,
n
);
self.idx += c;
Ok(c)
}
}
pub struct MatchLines<R: Read> {
reader: BufReader<R>,
r: Regex,
}
impl <R: Read> Iterator for MatchLines<R> {
type Item = stdio::Result<Vec<String>>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let mut buf = String::new();
match self.reader.read_line(&mut buf) {
Ok(0) => {
return None
},
Err(e) => {
return Some(Err(e))
},
Ok(_n) => {
if buf.ends_with("\n") {
buf.pop();
}
match self.r.captures(&buf) {
Some(cap) => {
return Some(Ok(cap.iter().map(|s| s.unwrap().to_string()).collect()));
},
_ => () }
}
}
}
}
}
pub fn match_lines_stdin(r: Regex) -> MatchLines<Stdin> {
MatchLines {
reader: BufReader::new(stdin()),
r: r,
}
}
pub fn match_lines<R: Read>(reader: R, r: Regex) -> MatchLines<R> {
MatchLines {
reader: BufReader::new(reader),
r: r
}
}
pub trait OctaveString {
fn to_octave(&self, name: &str) -> String;
}
impl <T: fmt::Display + Clone> OctaveString for Matrix<T> {
fn to_octave(&self, name: &str) -> String {
format!(
"# name: {}\n# type: matrix\n# rows: {}\n# columns: {}\n{}",
name, self.rows(), self.cols(), self.to_csv(" ")
)
}
}
impl <T: fmt::Display> OctaveString for Vec<T> {
fn to_octave(&self, name: &str) -> String {
format!(
"# name: {}\n# type: matrix\n# rows: {}\n# columns: {}\n{}\n",
name, self.len(), 1, self.to_csv("\n")
)
}
}
pub struct CsvReader<R: Read> {
reader: BufReader<R>,
delim: String
}
impl <R: Read> CsvReader<R> {
pub fn delimiter(self, delim: &str) -> CsvReader<R> {
CsvReader {
reader: self.reader,
delim: delim.to_string()
}
}
}
impl <R: Read> Iterator for CsvReader<R> {
type Item = stdio::Result<Vec<String>>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let mut buf = String::new();
match self.reader.read_line(&mut buf) {
Ok(0) => {
return None
},
Err(e) => {
return Some(Err(e))
},
Ok(_n) => {
let nc = match buf.find('#') {
Some(pos) => {
let mut tmp = buf.clone();
tmp.truncate(pos);
tmp
},
_ => buf
};
if nc.trim().len() > 0 {
return Some(Ok(
nc.split(&self.delim)
.map(|x| x.trim().to_string()) .collect::<Vec<String>>()
));
}
}
}
}
}
}
pub fn csv_reader<R: Read>(reader: R) -> CsvReader<R> {
CsvReader {
reader: BufReader::new(reader),
delim: ",".to_string()
}
}
pub trait FromCsv: Sized {
fn from_csv<R: Read>(reader: CsvReader<R>) -> Result<Self, String>;
}
impl <T: FromStr + Clone> FromCsv for Matrix<T> {
fn from_csv<R: Read>(reader: CsvReader<R>) -> Result<Matrix<T>, String> {
let mut m = Matrix::new();
for i in reader {
let mut v = Vec::new();
for j in i.unwrap() {
match j.parse::<T>() {
Ok(val) => v.push(val),
_ => {
return Err(format!("Could not parse the value: {}", j));
}
}
}
m.add_row(&v);
}
Ok(m)
}
}
impl <T: FromStr + Clone> FromCsv for Vec<T> {
fn from_csv<R: Read>(reader: CsvReader<R>) -> Result<Vec<T>, String> {
let mut v = Vec::new();
for i in reader {
for j in i.unwrap() {
match j.parse::<T>() {
Ok(val) => v.push(val),
_ => {
return Err(format!("Could not parse the value: {}", j));
}
}
}
}
Ok(v)
}
}
#[cfg(test)]
mod tests {
extern crate regex;
use super::*;
use std::io::Read;
use self::regex::Regex;
use std::fs::File;
use std::io::BufReader;
use matrix::Matrix;
#[test]
fn test_read_gzip() {
assert_eq!(
String::from_utf8(
GzipData::from_file("datasets/testing/hello_world.gz").unwrap().into_bytes()).unwrap(),
"hello world".to_string()
);
assert_eq!(GzipData::from_file("datasets/testing/hello_world.gz").unwrap().len(), 11);
assert!(GzipData::from_file("datasets/testing/random.data").is_err());
let mut data = GzipData::from_file("datasets/testing/hello_world.gz").unwrap();
let mut v: Vec<u8> = Vec::new();
assert!(data.read_to_end(&mut v).is_ok());
assert_eq!(String::from_utf8(v).unwrap(), "hello world".to_string());
}
#[test]
fn test_match_lines() {
let f = File::open("datasets/testing/lines.txt").unwrap();
let r = BufReader::new(f);
let mut v = Vec::new();
for line in match_lines(r, Regex::new(r"^[a-z]+ (\d+)$").unwrap()) {
let captures = line.unwrap();
v.push(captures[1].parse::<usize>().unwrap());
}
assert_eq!(v, vec![1, 2, 3]);
}
#[test]
fn test_matrix_to_octave_string() {
let m = mat![1, 2, 3; 4, 5, 6];
let s = m.to_octave("mymatrix");
assert_eq!(s,
"# name: mymatrix\n# type: matrix\n# rows: 2\n# columns: 3\n1 2 3\n4 5 6\n"
);
}
#[test]
fn test_vec_to_octave_string() {
let m = vec![1,2,3,4];
let s = m.to_octave("myvec");
assert_eq!(s,
"# name: myvec\n# type: matrix\n# rows: 4\n# columns: 1\n1\n2\n3\n4\n"
);
}
#[test]
fn test_vec_to_csv() {
let v = vec![1, 2, 3, 4];
assert_eq!(v.to_csv(","), "1,2,3,4");
let s = [1, 2, 3];
assert_eq!(s.to_csv(","), "1,2,3");
let a = [1];
assert_eq!(a.to_csv(","), "1");
let b = [1,2];
assert_eq!(b.to_csv(","), "1,2");
let c = Vec::<usize>::new();
assert_eq!(c.to_csv(","), "");
}
#[test]
fn test_mat_to_csv() {
let m = mat![1, 2, 3; 4, 5, 6];
assert_eq!(m.to_csv(","), "1,2,3\n4,5,6\n");
}
#[test]
fn test_csv_reader() {
let f = File::open("datasets/testing/csv.txt").unwrap();
let r = csv_reader(f);
let v = r.map(|x| x.unwrap()).collect::<Vec<Vec<String>>>();;
assert_eq!(v, vec![
vec!["1","2","3","4"],
vec!["5","6","7","8"],
vec!["9","10","11","12"]
]);
}
#[test]
fn test_csv_reader_matrix() {
let f = File::open("datasets/testing/csv.txt").unwrap();
let r = csv_reader(f);
let m = Matrix::<usize>::from_csv(r).unwrap();
assert_eq!(m, mat![1,2,3,4; 5,6,7,8; 9,10,11,12]);
}
#[test]
fn test_csv_reader_vec() {
let f = File::open("datasets/testing/csv.txt").unwrap();
let r = csv_reader(f);
let v = Vec::<usize>::from_csv(r).unwrap();
assert_eq!(v, vec![1,2,3,4,5,6,7,8,9,10,11,12]);
}
}