lbl-core 0.1.0

lbl is a toolkit for managing address labels
Documentation
use crate::{standardize_collection, CollectionData, LblError};
use polars::prelude::*;
use std::collections::HashMap;
use std::fs::File;
use std::path::{Path, PathBuf};

/// load label file
pub fn load_file(path: PathBuf, metadata: Option<&CollectionData>) -> Result<DataFrame, LblError> {
    let metadata_owned;
    let metadata_ref = match metadata {
        Some(metadata) => metadata,
        None => {
            metadata_owned = parse_path_metadata(&path);
            &metadata_owned
        }
    };
    standardize_collection(load_raw_data(path.clone())?, metadata_ref)
}

/// parse path metadata
pub fn parse_path_metadata(path: &Path) -> CollectionData {
    let mut data = CollectionData::default();
    if let Some(collection_dir) = path.parent() {
        if let Some(collection_name) = collection_dir.file_name() {
            data.collection = Some(collection_name.to_string_lossy().into_owned());
        }

        if let Some(network_dir) = collection_dir.parent() {
            if let Some(network_name) = network_dir.file_name() {
                data.network = Some(network_name.to_string_lossy().into_owned());
            }
        }
    }

    data
}

/// flatten into Vec<PathBuf>
pub trait FlattenPathBufVec {
    /// flatten into Vec<PathBuf>
    fn flatten_into_pathbuf(&self) -> Vec<PathBuf>;
}

impl FlattenPathBufVec for Vec<PathBuf> {
    fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
        self.clone()
    }
}

impl FlattenPathBufVec for HashMap<String, Vec<PathBuf>> {
    fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
        self.iter()
            .flat_map(|(_key, value)| value.clone())
            .collect()
    }
}

impl FlattenPathBufVec for HashMap<String, HashMap<String, Vec<PathBuf>>> {
    fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
        self.iter()
            .flat_map(|(_outer_key, inner_map)| {
                inner_map
                    .iter()
                    .flat_map(|(_inner_key, value)| value.clone())
            })
            .collect()
    }
}

/// load label files
pub fn load_files<T: FlattenPathBufVec>(
    paths: &T,
    metadata: Option<&CollectionData>,
) -> Result<DataFrame, LblError> {
    let paths = paths.flatten_into_pathbuf();
    let dfs: Result<Vec<DataFrame>, LblError> =
        paths.into_iter().map(|p| load_file(p, metadata)).collect();
    let mut dfs = dfs?;

    let mut final_df = dfs.remove(0);
    for df in dfs {
        final_df = final_df.vstack(&df)?;
    }
    Ok(final_df)
}

/// load raw label file
pub fn load_raw_data(path: PathBuf) -> Result<DataFrame, LblError> {
    let extension = path
        .extension()
        .and_then(std::ffi::OsStr::to_str)
        .unwrap_or("");
    match extension {
        "csv" => {
            let file = File::open(path)?;
            let df = CsvReader::new(file).finish()?;
            Ok(df)
        }
        "parquet" => {
            let file = File::open(path)?;
            let df = ParquetReader::new(file).finish()?;
            Ok(df)
        }
        _ => Err(LblError::LblError(format!(
            "Unsupported file type: {}",
            extension
        ))),
    }
}