use crate::datasources::DataMemmap;
use super::DataSource;
use crate::errors::{PointCloudError, ParsingError};
use flate2::read::GzDecoder;
use indexmap::IndexMap;
use std::ffi::OsStr;
use std::fs::File;
use std::io::Read;
use std::path::PathBuf;
extern crate csv;
use self::csv::Reader;
mod numeric;
use numeric::*;
mod bools;
use bools::*;
mod vector;
use vector::*;
mod strings;
use strings::*;
mod list;
pub use list::MetadataList;
use list::*;
pub mod values;
use values::*;
mod summary;
pub use summary::*;
#[derive(Debug, Clone)]
pub struct LabelScheme {
name_column: String,
schema: IndexMap<String, Value>,
}
impl LabelScheme {
pub fn new() -> LabelScheme {
LabelScheme {
name_column: "".to_string(),
schema: IndexMap::new(),
}
}
#[doc(hidden)]
pub fn schema_json(&self) -> String {
format!(
"{{{}}}",
self.schema
.iter()
.map(|(k, v)| format!("\"{}\":\"{}\"", k, v.value_type()))
.collect::<Vec<String>>()
.join(",")
)
}
#[doc(hidden)]
pub fn add_value(&mut self, key: String, value: Value) {
self.schema.insert(key, value);
}
pub fn add_name_column(&mut self, name: &str) {
self.name_column = name.to_string();
}
pub fn add_string(&mut self, key: String) {
self.schema.insert(key, Value::String("".to_string()));
}
pub fn add_bool(&mut self, key: String) {
self.schema.insert(key, Value::Bool(false));
}
pub fn add_f32(&mut self, key: String) {
self.schema.insert(key, Value::Number(Number::Real(0.0)));
}
pub fn add_u32(&mut self, key: String) {
self.schema.insert(key, Value::Number(Number::Natural(0)));
}
pub fn add_i32(&mut self, key: String) {
self.schema.insert(key, Value::Number(Number::Integer(0)));
}
pub fn add_vector(&mut self, name: String, dim: usize, dtype: &str) {
let v = match dtype {
"f32" | "Real" => Vector::Real(vec![0.0; dim]),
"u32" | "Natural" => Vector::Natural(vec![0; dim]),
"i32" | "Integer" => Vector::Integer(vec![0; dim]),
&_ => panic!("Don't know what type you provided"),
};
self.schema.insert(name, Value::Vector(v));
}
#[doc(hidden)]
pub fn empty(&self) -> MetadataList {
let mut metalist = MetadataList::new();
for (k, v) in self.schema.iter() {
metalist.insert(k.clone(), v.blank_list());
}
metalist
}
pub fn open(&self, path: &PathBuf) -> Result<MetadataList, PointCloudError> {
println!(
"Opening {:?} with extension {:?}",
path,
path.extension().unwrap()
);
match path.extension().and_then(OsStr::to_str) {
Some("dat") => self.open_memmap(path),
Some("csv") => self.open_csv(path),
Some("gz") => self.open_csv(path),
_ => panic!(
"Please provide either a CSV or a memmaped dat file, not {:?}",
path
),
}
}
#[doc(hidden)]
pub fn open_memmap(&self, path: &PathBuf) -> Result<MetadataList, PointCloudError> {
assert!(self.schema.len() == 1);
let (name, val_type) = self.schema.iter().next().unwrap();
let labels_dim;
if let Value::Vector(v) = val_type {
labels_dim = v.len();
} else {
panic!("Need a vector only to use a MEMMAP file!");
}
let label = DataMemmap::new(labels_dim, &path).unwrap();
let count = label.len();
let mut label_in_ram = Vec::new();
for i in 0..count {
label_in_ram.extend_from_slice(label.get(i).unwrap());
}
let mut list = MetadataList::new();
list.insert(name.clone(), VectorList::from_f32(label_in_ram, labels_dim));
Ok(list)
}
#[doc(hidden)]
pub fn open_csv(&self, path: &PathBuf) -> Result<MetadataList, PointCloudError> {
if !path.exists() {
panic!("CSV file {:?} does not exist", path);
}
println!("LabelScheme: {:?}", self);
match File::open(&path) {
Ok(file) => {
if path.extension().unwrap() == "gz" {
self.read_csv(Reader::from_reader(GzDecoder::new(file)), path)
} else {
self.read_csv(Reader::from_reader(file), path)
}
}
Err(e) => panic!("Unable to open csv file {:#?}", e),
}
}
fn read_csv<R: Read>(
&self,
mut rdr: Reader<R>,
path: &PathBuf,
) -> Result<MetadataList, PointCloudError> {
let mut count = 0;
let mut internal_vals: Vec<(String, usize, ValueList)> = Vec::new();
let mut names: IndexMap<usize, String> = IndexMap::new();
let has_name: bool = self.name_column != "";
let name_index: usize;
{
let columns = rdr.headers().expect("Can't read header.");
for (val_name, value) in self.schema.iter() {
let mut y_index = columns.len() + 1;
for (i, c) in columns.iter().enumerate() {
if c == val_name {
y_index = i;
}
}
if y_index == columns.len() + 1 {
panic!("CSV has no {} column!", val_name);
}
internal_vals.push((val_name.clone(), y_index, value.blank_list()));
}
if has_name {
let mut y_index = columns.len() + 1;
for (i, c) in columns.iter().enumerate() {
if c == self.name_column {
y_index = i;
}
}
if y_index == columns.len() + 1 {
panic!("CSV has no {} column!", self.name_column);
}
name_index = y_index;
} else {
name_index = 0;
}
}
for result in rdr.records() {
let record = result.expect("Unable to read a record from the label CSV");
for (val_name, val_name_index, list) in internal_vals.iter_mut() {
match record.get(*val_name_index) {
Some(y) => {
if let Err(..) = list.read_csv_val(y) {
return Err(PointCloudError::ParsingError(
ParsingError::CSVReadError {
file_name: path.to_string_lossy().to_string(),
line_number: record.position().unwrap().line() as usize,
key: val_name.clone(),
},
));
}
}
None => {
return Err(PointCloudError::ParsingError(ParsingError::CSVReadError {
file_name: path.to_string_lossy().to_string(),
line_number: record.position().unwrap().line() as usize,
key: val_name.clone(),
}));
}
}
}
if has_name {
match record.get(name_index) {
Some(y) => {
names.insert(count, y.to_string());
}
None => {
return Err(PointCloudError::ParsingError(ParsingError::CSVReadError {
file_name: path.to_string_lossy().to_string(),
line_number: record.position().unwrap().line() as usize,
key: self.name_column.clone(),
}));
}
}
}
count += 1;
}
let mut metalist = MetadataList::new();
while let Some((name, _name_index, list)) = internal_vals.pop() {
metalist.insert(name, list);
}
if has_name {
metalist.insert_names(names);
}
Ok(metalist)
}
}