cifar_10_loader 0.2.0

Crate of loading cifar10
Documentation
extern crate itertools;
extern crate regex;
extern crate walkdir;

#[cfg(feature = "rayon")]
extern crate rayon;
///CifarDataset is Top Level Struct of cifar_10_loader.
///
///CifarDataset include labels of Cifar10, train_datas, test_datas and their count.
///
pub struct CifarDataset {
    ///Cifar10 lables.
    ///
    /// ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
    pub labels: Vec<String>,
    /// Count of training datas.
    ///
    /// 50000 datas
    pub train_count: usize,
    /// Count of test datas.
    ///
    /// 10000 datas
    pub test_count: usize,
    /// Dataset for training.
    pub train_dataset: Vec<super::CifarImage>,
    /// Dataset for test.
    pub test_dataset: Vec<super::CifarImage>,
}
///For inner use struct
struct CifarFilePaths {
    meta_data_paths: Vec<::std::path::PathBuf>,
    train_data_paths: Vec<::std::path::PathBuf>,
    test_data_paths: Vec<::std::path::PathBuf>,
}


impl CifarDataset {
    ///Creates a new CifarDataset.
    pub fn new<P: AsRef<::std::path::Path>>(path: P) -> Result<Self, String> {
        use std::thread::spawn;
        let CifarFilePaths {
            meta_data_paths,
            train_data_paths,
            test_data_paths,
        } = CifarDataset::get_file_paths(path)?;
        let meta_data_handle = spawn(move || CifarDataset::get_meta_data(&meta_data_paths));
        let train_images_handle = spawn(move || {
            CifarDataset::get_images(CifarDataset::get_byte_datas(&train_data_paths)?)
        });
        let test_images_handle = spawn(move || {
            CifarDataset::get_images(CifarDataset::get_byte_datas(&test_data_paths)?)
        });
        let labels = CifarDataset::for_cifardataset_join_thread(meta_data_handle)?;
        let train_images = CifarDataset::for_cifardataset_join_thread(train_images_handle)?;
        let test_images = CifarDataset::for_cifardataset_join_thread(test_images_handle)?;
        let cifar_dataset = CifarDataset {
            labels: labels,
            train_count: train_images.len() as usize,
            train_dataset: train_images,
            test_count: test_images.len() as usize,
            test_dataset: test_images,
        };
        Ok(cifar_dataset)
    }
    fn for_cifardataset_join_thread<T>(
        p: ::std::thread::JoinHandle<Result<T, ::std::io::Error>>,
    ) -> Result<T, String> {
        p.join()
            .map(|content| content.map_err(|err| err.to_string()))
            .map_err(|_| "thread panicked".to_string())?
    }
    fn get_file_paths<P: AsRef<::std::path::Path>>(path: P) -> Result<CifarFilePaths, String> {
        use self::regex::Regex;
        let paths = &walkdir::WalkDir::new(path)
            .into_iter()
            .flat_map(|x| x.map(|x| x.path().to_path_buf()))
            .filter(|x| x.is_file())
            .collect::<Vec<::std::path::PathBuf>>();
        let train_data_path_re = Regex::new("data_batch_[1-5].bin").map_err(|err| err.to_string())?;
        let test_data_path_re = Regex::new("test_batch.bin").map_err(|err| err.to_string())?;
        let cifar_file_paths = CifarFilePaths {
            meta_data_paths: CifarDataset::get_meta_data_paths(paths)?,
            train_data_paths: CifarDataset::get_paths_regex(paths, &train_data_path_re)?,
            test_data_paths: CifarDataset::get_paths_regex(paths, &test_data_path_re)?,
        };
        Ok(cifar_file_paths)
    }
    #[cfg(feature = "rayon")]
    fn get_meta_data_paths(
        paths: &[::std::path::PathBuf],
    ) -> Result<Vec<::std::path::PathBuf>, String> {
        use self::rayon::prelude::*;
        use std::path::{Path, PathBuf};
        let meta_data_file_name = Path::new("batches.meta.txt");
        let fpaths: Vec<PathBuf> = paths
            .into_par_iter()
            .filter(|path| {
                path.file_name()
                    .map(|file_name| file_name == meta_data_file_name)
                    .unwrap_or(false)
            })
            .cloned()
            .collect();
        if fpaths.is_empty() {
            Err("Can't Find Meta Data Files!!".to_string())
        } else {
            Ok(fpaths)
        }
    }
    #[cfg(not(feature = "rayon"))]
    fn get_meta_data_paths(
        paths: &[::std::path::PathBuf],
    ) -> Result<Vec<::std::path::PathBuf>, String> {
        use std::path::{Path, PathBuf};
        let meta_data_file_name = Path::new("batches.meta.txt");
        let fpaths: Vec<PathBuf> = paths
            .into_iter()
            .filter(|path| {
                path.file_name()
                    .map(|file_name| file_name == meta_data_file_name)
                    .unwrap_or(false)
            })
            .cloned()
            .collect();
        if fpaths.is_empty() {
            Err("Can't Find Meta Data Files!!".to_string())
        } else {
            Ok(fpaths)
        }
    }
    #[cfg(feature = "rayon")]
    fn get_paths_regex(
        paths: &[::std::path::PathBuf],
        re: &self::regex::Regex,
    ) -> Result<Vec<::std::path::PathBuf>, String> {
        use self::rayon::prelude::*;
        use std::path::PathBuf;
        let fpaths: Vec<PathBuf> = paths
            .par_iter()
            .filter(|path| {
                path.file_name()
                    .map(|file_name| {
                        let file_name = file_name.to_string_lossy();
                        re.is_match(file_name.as_ref())
                    })
                    .unwrap_or(false)
            })
            .cloned()
            .collect();
        if fpaths.is_empty() {
            Err("Can't Find Train Data Files!!".to_string())
        } else {
            Ok(fpaths)
        }
    }
    #[cfg(not(feature = "rayon"))]
    fn get_paths_regex(
        paths: &[::std::path::PathBuf],
        re: &self::regex::Regex,
    ) -> Result<Vec<::std::path::PathBuf>, String> {
        use std::path::PathBuf;
        let fpaths: Vec<PathBuf> = paths
            .iter()
            .filter(|path| {
                path.file_name()
                    .map(|file_name| {
                        let file_name = file_name.to_string_lossy();
                        re.is_match(file_name.as_ref())
                    })
                    .unwrap_or(false)
            })
            .cloned()
            .collect();
        if fpaths.is_empty() {
            Err("Can't Find Train Data Files!!".to_string())
        } else {
            Ok(fpaths)
        }
    }
    fn get_meta_data(paths: &[::std::path::PathBuf]) -> Result<Vec<String>, ::std::io::Error> {
        use std::io::Read;
        paths
            .iter()
            .map(|meta_path| -> Result<String, ::std::io::Error> {
                ::std::fs::File::open(meta_path).and_then(|mut file| {
                    let mut contents = String::new();
                    file.read_to_string(&mut contents).map(|_| contents)
                })
            })
            .map(|lines| -> Result<Vec<String>, ::std::io::Error> {
                lines.map(|l| -> Vec<String> {
                    l.lines()
                        .filter(|x| !x.is_empty())
                        .map(|x| x.into())
                        .collect()
                })
            })
            .collect::<Result<Vec<Vec<String>>, ::std::io::Error>>()
            .map(|v| v.concat())
    }
    #[cfg(feature = "rayon")]
    fn get_byte_datas(paths: &[::std::path::PathBuf]) -> Result<Vec<Vec<u8>>, ::std::io::Error> {
        use std::io::{BufReader, Read};
        use self::rayon::prelude::*;
        paths
            .par_iter()
            .map(|file_path| -> Result<Vec<u8>, ::std::io::Error> {
                ::std::fs::File::open(file_path).and_then(|file| {
                    let mut byte_data: Vec<u8> = Vec::new();
                    BufReader::new(file)
                        .read_to_end(&mut byte_data)
                        .map(|_| byte_data)
                })
            })
            .map(|byte_data| -> Result<Vec<Vec<u8>>, ::std::io::Error> {
                byte_data.map(|b| -> Vec<Vec<u8>> {
                    b.chunks(3073)
                        .map(|byte_img| -> Vec<u8> { byte_img.to_vec() })
                        .collect()
                })
            })
            .collect::<Result<Vec<Vec<Vec<u8>>>, ::std::io::Error>>()
            .map(|v| v.concat())
    }
    #[cfg(not(feature = "rayon"))]
    fn get_byte_datas(paths: &[::std::path::PathBuf]) -> Result<Vec<Vec<u8>>, ::std::io::Error> {
        use std::io::{BufReader, Read};
        paths
            .iter()
            .map(|file_path| -> Result<Vec<u8>, ::std::io::Error> {
                ::std::fs::File::open(file_path).and_then(|file| {
                    let mut byte_data: Vec<u8> = Vec::new();
                    BufReader::new(file)
                        .read_to_end(&mut byte_data)
                        .map(|_| byte_data)
                })
            })
            .map(|byte_data| -> Result<Vec<Vec<u8>>, ::std::io::Error> {
                byte_data.map(|b| -> Vec<Vec<u8>> {
                    b.chunks(3073)
                        .map(|byte_img| -> Vec<u8> { byte_img.to_vec() })
                        .collect()
                })
            })
            .collect::<Result<Vec<Vec<Vec<u8>>>, ::std::io::Error>>()
            .map(|v| v.concat())
    }
    #[cfg(feature = "rayon")]
    fn get_images(byte_datas: Vec<Vec<u8>>) -> Result<Vec<super::CifarImage>, ::std::io::Error> {
        use self::rayon::prelude::*;
        byte_datas
            .into_par_iter()
            .map(|byte_img| {
                use super::CifarImageTrait;
                super::CifarImage::new(&byte_img)
            })
            .collect::<Result<Vec<super::CifarImage>, ::std::io::Error>>()
    }
    #[cfg(not(feature = "rayon"))]
    fn get_images(byte_datas: Vec<Vec<u8>>) -> Result<Vec<super::CifarImage>, ::std::io::Error> {
        byte_datas
            .into_iter()
            .map(|byte_img| {
                use super::CifarImageTrait;
                super::CifarImage::new(&byte_img)
            })
            .collect::<Result<Vec<super::CifarImage>, ::std::io::Error>>()
    }
}