csvsc 2.2.1

Build processing chains for CSV files
Documentation
use std::path::{Path, PathBuf};
use std::fs::{self, File};
use std::collections::HashMap;
use std::io;

use csv::{Writer, WriterBuilder};

use crate::{Headers, Row, error};

/// Helper for building a target for flushing data into.
pub struct Target { }

impl Target {
    /// Write output to a file whose path is specified by the given column.
    ///
    /// This gives the oportunity to create a stream that writes to multiple
    /// files depending on some criteria. It doesn't write the value of the
    /// specified column by default, it is just used to decide the path to the
    /// file(s) for writing but that behaviour is configurable.
    pub fn from_column(name: &str) -> ColumnTarget {
        ColumnTarget::new(name.into())
    }

    /// Write output to the specified path in the filesystem.
    pub fn path(path: &str) -> impl TargetManager {
        PathTarget::new(path.into())
    }

    /// Write output to stdout
    pub fn stdout() -> impl TargetManager {
        StdoutTarget::new()
    }

    /// Write output to stderr
    pub fn stderr() -> impl TargetManager {
        StderrTarget::new()
    }
}

pub trait TargetManager {
    fn write_row(&mut self, headers: &Headers, row: &Row) -> error::Result<()>;
}

pub struct ColumnTarget {
    colname: String,
    write_target: bool,
    targets: HashMap<PathBuf, Writer<File>>,
}

impl ColumnTarget {
    fn new(name: String) -> ColumnTarget {
        ColumnTarget {
            colname: name,
            targets: HashMap::new(),
            write_target: false,
        }
    }

    pub fn write_target(self) -> ColumnTarget {
        ColumnTarget {
            write_target: true,
            ..self
        }
    }
}

impl TargetManager for ColumnTarget {
    fn write_row(&mut self, headers: &Headers, row: &Row) -> error::Result<()> {
        // can unwrap because we checked the existence of the field
        // while building the Flush
        let target_path = PathBuf::from(
            headers
                .get_field(row, &self.colname)
                .ok_or_else(|| error::Error::ColumnNotFound(self.colname.clone()))?
        );

        let colname = self.colname.clone();

        let writer = if self.targets.contains_key(&target_path) {
            self.targets.get_mut(&target_path).unwrap()
        } else {
            if let Some(dirname) = Path::new(&target_path).parent() {
                fs::create_dir_all(dirname)?;
            }

            let mut writer = Writer::from_path(&target_path)?;

            writer.write_record(&if self.write_target {
                headers.as_row().clone()
            } else {
                Row::from(
                    headers
                        .as_row()
                        .iter()
                        .filter(|col| *col != self.colname)
                        .collect::<Vec<_>>()
                )
            })?;

            self.targets.insert(target_path.to_path_buf(), writer);

            self.targets.get_mut(&target_path).unwrap()
        };

        writer.write_record(&if self.write_target {
            row.clone()
        } else {
            Row::from(
                headers
                    .iter()
                    .zip(row)
                    .filter(|(header, _val)| *header != colname)
                    .map(|(_header, val)| val)
                    .collect::<Vec<_>>()
            )
        })?;

        Ok(())
    }
}

struct PathTarget {
    path: PathBuf,
    writer: Option<Writer<File>>,
}

impl PathTarget {
    fn new(path: PathBuf) -> PathTarget {
        PathTarget {
            path,
            writer: None,
        }
    }
}

impl TargetManager for PathTarget {
    fn write_row(&mut self, headers: &Headers, row: &Row) -> error::Result<()> {
        if self.writer.is_none() {
            if let Some(parent) = self.path.parent() {
                fs::create_dir_all(parent)?;
            }

            let mut writer = Writer::from_path(&self.path)?;

            writer.write_record(headers.as_row())?;

            self.writer = Some(writer);
        }

        let writer = self.writer.as_mut().unwrap();

        writer.write_record(row)?;

        Ok(())
    }
}

struct StdoutTarget {
    writer: Option<Writer<io::Stdout>>,
}

impl StdoutTarget {
    fn new() -> StdoutTarget {
        StdoutTarget {
            writer: None,
        }
    }
}

impl TargetManager for StdoutTarget {
    fn write_row(&mut self, headers: &Headers, row: &Row) -> error::Result<()> {
        if self.writer.is_none() {
            let mut writer = WriterBuilder::new().from_writer(io::stdout());

            writer.write_record(headers.as_row())?;

            self.writer = Some(writer);
        }

        let writer = self.writer.as_mut().unwrap();

        writer.write_record(row)?;

        Ok(())
    }
}

struct StderrTarget {
    writer: Option<Writer<io::Stderr>>,
}

impl StderrTarget {
    fn new() -> StderrTarget {
        StderrTarget {
            writer: None,
        }
    }
}

impl TargetManager for StderrTarget {
    fn write_row(&mut self, headers: &Headers, row: &Row) -> error::Result<()> {
        if self.writer.is_none() {
            let mut writer = WriterBuilder::new().from_writer(io::stderr());

            writer.write_record(headers.as_row())?;

            self.writer = Some(writer);
        }

        let writer = self.writer.as_mut().unwrap();

        writer.write_record(row)?;

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use std::fs::File;
    use std::io::Read;
    use std::mem;

    use super::{ColumnTarget, TargetManager};

    use crate::Row;

    #[test]
    fn test_column_target() {
        let mut target = ColumnTarget::new("_target".into());
        let path = "data/col_target_test.csv";
        let mut contents = String::new();

        target.write_row(&Row::from(vec!["col", "_target"]).into(), &Row::from(vec!["1", path])).unwrap();
        mem::drop(target);  // Force closing the file

        File::open(path).unwrap().read_to_string(&mut contents).unwrap();

        assert_eq!(contents, "col\n1\n");
    }

    #[test]
    fn test_column_target_write_target() {
        let mut target = ColumnTarget::new("_target".into()).write_target();
        let path = "data/col_target_test_with_target.csv";
        let mut contents = String::new();

        target.write_row(&Row::from(vec!["col", "_target"]).into(), &Row::from(vec!["1", path])).unwrap();
        mem::drop(target);  // Force closing the file

        File::open(path).unwrap().read_to_string(&mut contents).unwrap();

        assert_eq!(contents, format!("col,_target\n1,{}\n", path));
    }
}