/*!

`csvsc` is a framework for building csv file processors.

Imagine you have N csv files with the same structure and you want to use them
to make other M csv files whose information depends in some way on the original
files. This is what csvcv was built for. With this tool you can build a
processing chain (_row stream_) that will take each of the input files and
generate new output files with the modifications.

# Quickstart

Start a new binary project with cargo:

```text
$ cargo new --bin miprocesadordecsv
```

Add `csvsc` and `encoding` as a dependency in `Cargo.toml`.

```toml
[dependencies]
csvsc = "2.2"
```

Now start building your processing chain. Specify the inputs (one or more csv
files), the transformations, and the output.

```
use csvsc::prelude::*;

let mut chain = InputStreamBuilder::from_paths(&[
        // Put here the path to your source files, from 1 to a million
        "test/assets/chicken_north.csv",
        "test/assets/chicken_south.csv",
    ]).unwrap().build().unwrap()

    // Here is where you do the magic: add columns, remove ones, filter
    // the rows, group and aggregate, even probably transpose the data
    // to fit your needs.

    // Specify some (zero, one or many) output targets so that results of
    // your computations get stored somewhere.
    .flush(Target::path("data/output.csv")).unwrap()

    .into_iter();

// And finally consume the stream, reporting any errors to stderr.
while let Some(item) = chain.next() {
    if let Err(e) = item {
        eprintln!("{}", e);
    }
}
```

## Example

Grab your input files, in this case I'll use this two:

**chicken_north.csv**

```csv
month,eggs per week
1,3
1,NaN
1,6
2,
2,4
2,8
3,5
3,1
3,8
```

**chicken_south.csv**

```csv
month,eggs per week
1,2
1,NaN
1,
2,7
2,8
2,23
3,3
3,2
3,12
```

Now build your processing chain.

```rust
// main.rs
use csvsc::prelude::*;

use encoding::all::UTF_8;

let mut chain = InputStreamBuilder::from_paths(vec![
        "test/assets/chicken_north.csv",
        "test/assets/chicken_south.csv",
    ]).unwrap()

    // optionally specify the encoding
    .with_encoding(UTF_8)

    // optionally add a column with the path of the source file as specified
    // in the builder
    .with_source_col("_source")

    // build the row stream
    .build().unwrap()

    // Filter some columns with invalid values
    .filter_col("eggs per week", |value| {
        value.len() > 0 && value != "NaN"
    }).unwrap()

    // add a column with a value obtained from the filename ¡wow!
    .add(
        Column::with_name("region")
            .from_column("_source")
            .with_regex("_([a-z]+).csv").unwrap()
            .definition("$1")
    ).unwrap()

    // group by two columns, compute some aggregates
    .group(["region", "month"], |row_stream| {
        row_stream.reduce(vec![
            Reducer::with_name("region").of_column("region").last("").unwrap(),
            Reducer::with_name("month").of_column("month").last("").unwrap(),
            Reducer::with_name("avg").of_column("eggs per week").average().unwrap(),
            Reducer::with_name("sum").of_column("eggs per week").sum(0.0).unwrap(),
        ]).unwrap()
    })

    // Write a report to a single file that will contain all the data
    .flush(
        Target::path("data/report.csv")
    ).unwrap()

    // This column will allow us to output to multiple files, in this case
    // a report by month
    .add(
        Column::with_name("monthly report")
            .from_all_previous()
            .definition("data/monthly/{month}.csv")
    ).unwrap()

    .del(vec!["month"])

    // Write every row to a file specified by its `monthly report` column added
    // previously
    .flush(
        Target::from_column("monthly report")
    ).unwrap()

    // Pack the processing chain into an interator that can be consumed.
    .into_iter();

// Consuming the iterator actually triggers all the transformations.
while let Some(item) = chain.next() {
    item.unwrap();
}
```

This is what comes as output:

**data/monthly/1.csv**

```csv
region,avg,sum
south,2,2
north,4.5,9
```

**data/monthly/2.csv**

```csv
region,avg,sum
north,6,12
south,12.666666666666666,38
```

**data/monthly/3.csv**

```csv
region,avg,sum
north,4.666666666666667,14
south,5.666666666666667,17
```

**data/report.csv**

```csv
region,month,avg,sum
north,2,6,12
south,1,2,2
south,2,12.666666666666666,38
north,3,4.666666666666667,14
south,3,5.666666666666667,17
north,1,4.5,9
```

## Dig deeper

Check [`InputStreamBuilder`](input::InputStreamBuilder) to see more options for
starting a processing chain and reading your input.

Go to the [`RowStream`] documentation to see all the transformations available
as well as options to flush the data to files or standard I/O.
*/

pub mod add;
mod add_with;
mod group;
mod adjacent_group;
mod del;
pub mod error;
mod flush;
mod headers;
pub mod input;
mod inspect;
mod mock;
mod reduce;
mod rename;
mod row_stream;
mod map;
mod filter;
mod select;
pub mod prelude;

pub use crate::input::InputStream;
pub use crate::add::Column;
pub use crate::flush::Target;
pub use crate::row_stream::RowStream;
pub use crate::headers::Headers;
pub use crate::reduce::reducer::Reducer;
pub use crate::reduce::aggregate::Aggregate;
pub use crate::group::GroupCriteria;
pub use crate::error::{Error, RowResult};
pub use crate::mock::MockStream;

/// Type alias of csv::StringRecord. Represents a row of data.
pub type Row = csv::StringRecord;

impl From<Headers> for Row {
    fn from(headers: Headers) -> Row {
        headers.into_row()
    }
}

#[cfg(test)]
mod tests {
    use std::f64;

    use crate::prelude::*;

    #[test]
    fn test_from_paths_api() {
        let mut chain = InputStreamBuilder::from_paths(&[
                "test/assets/1.csv",
                "test/assets/2.csv",
            ]).unwrap().build().unwrap().into_iter();

        assert_eq!(chain.next().unwrap().unwrap(), Row::from(vec!["1", "3"]));
    }

    #[test]
    fn test_add_api() {
        InputStreamBuilder::from_paths(&["test/assets/1.csv"]).unwrap().build().unwrap()
            // Add a column whose value is built from the values of other columns
            .add(
                Column::with_name("_target")
                    .from_all_previous()
                    .definition("data/add/output/{a}.csv")
            ).unwrap()

            // Add a column whose value is built from parts of a previous column,
            // extracted with a regular expression.
            .add(
                Column::with_name("new_col")
                    .from_column("old_column")
                    .with_regex("regex").unwrap()
                    .definition("a definition")
            ).unwrap()

            // Add a column arbitrarily. You can access the headers of the whole
            // stream and the current row to compute its value.
            .add_with("new_col2", |_headers, _row| {
                Ok("new_value".into())
            }).unwrap()
            .into_iter();
    }

    #[test]
    fn test_reduce_api() {
        #[derive(Debug)]
        struct Foo {
            colname: String,
        }

        impl Aggregate for Foo {
            fn update(&mut self, _headers: &Headers, _rs: &Row) -> crate::error::Result<()> {
                unimplemented!()
            }

            fn colname(&self) -> &str {
                &self.colname
            }

            fn value(&self) -> String {
                "-".into()
            }
        }

        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            .reduce(vec![
                Reducer::with_name("rows").count(),
                Reducer::with_name("avg").of_column("col").average().unwrap(),
                Reducer::with_name("last").of_column("col").last("-").unwrap(),
                Reducer::with_name("max").of_column("col").max(f64::NEG_INFINITY).unwrap(),
                Reducer::with_name("min").of_column("col").min(f64::INFINITY).unwrap(),
                Reducer::with_name("sum").of_column("col").sum(0.0).unwrap(),
                Reducer::with_name("mul").of_column("col").product(1.0).unwrap(),
                Reducer::with_name("closure").of_column("col").with_closure(|acc, cur| {
                    Ok(acc * cur.parse::<i32>().unwrap())
                }, 1).unwrap(),
                Reducer::custom(Foo { colname: String::from("custom") }),
            ]).unwrap()
            .into_iter();
    }

    #[test]
    fn test_filter_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            // filter entire rows out depending on one column's value and a
            // condition, leaving errored rows untouched.
            .filter_col("b", |value| {
                value.is_empty() && value != "NaN"
            }).unwrap()

            // filter arbitrarily. You have access to the entire row
            .filter_row(|headers, row| {
                headers.get_field(row, "column").unwrap().is_empty()
            })
            .into_iter();
    }

    #[test]
    fn test_group_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            .adjacent_group(["a", "b"], |stream| {
                stream
            })

            .group(|headers: &Headers, row: &Row| {
                headers.get_field(row, "b").unwrap().to_string()
            }, |stream| {
                stream
            })
            .into_iter();
    }

    #[test]
    fn test_inspect_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            // pass every row through this closure. Nothing special, just to make
            // debugging easier. You might want to print the value of a column
            // for example.
            .review(|headers, rs| {
                if let Ok(row) = rs {
                    println!("Name: {:?}", headers.get_field(row, "name"));
                }
            })
            .into_iter();
    }

    #[test]
    fn test_rename_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            // rename a column
            .rename("old_name", "new_name")
            .into_iter();
    }

    #[test]
    fn test_map_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            // pass every value of the specified column through this closure and
            // replace it with the return value
            .map_col("colname", |value| {
                Ok(value.into())
            })
            .map_row(|_headers, row| {
                Ok(vec![
                    Ok(row.clone())
                ].into_iter())
            }, |headers| {
                headers.clone()
            })
            .into_iter();
    }

    #[test]
    fn test_del_api() {
        InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            // Delete columns by name
            .del(vec!["b"])
            .select(vec!["a"])
            .into_iter();
    }

    #[test]
    fn test_flush_api() {
        let mut chain = InputStreamBuilder::from_paths(vec!["test/assets/1.csv"]).unwrap().build().unwrap()
            .add(Column::with_name("_target").from_all_previous().definition("data/{a}.csv")).unwrap()
            // Configure a flush target, this is the output of this processing
            // chain, it can be a single file, multiple files or the standard
            // output.
            .flush(
                Target::from_column("_target")
            ).unwrap()

            .select(vec!["a", "b"])

            .flush(
                Target::path("data/a_path")
            ).unwrap()

            .flush(
                Target::stdout()
            ).unwrap()

            .flush(
                Target::stderr()
            ).unwrap()
            .into_iter();

        assert_eq!(chain.next().unwrap().unwrap(), Row::from(vec!["1", "3"]));
        assert_eq!(chain.next().unwrap().unwrap(), Row::from(vec!["5", "2"]));

        assert!(chain.next().is_none());
    }

    /*
    #[test]
    fn test_report_api() {
        assert!(false, "make a report that can be flushed to a file or to stdout");
        assert!(false, "this report should put every different error in a row and have a count in a different column");
    }
    */
}