use std::collections::{hash_map::Entry, HashMap};

use serde::ser::{Serialize, SerializeStruct, Serializer};

use crate::config::{Config, Delimiter};
use crate::moonblade::Program;
use crate::select::SelectColumns;
use crate::util;
use crate::CliResult;

static USAGE: &str = "
TODO...

Usage:
    xan cluster <column> [options] [<input>]
    xan cluster --help

cluster options:
    -k, --key <expr>  An expression to evaluate to generate a key
                      for each row by transforming the selected cell.

Common options:
    -h, --help               Display this message
    -n, --no-headers         When set, the first row will not be evaled
                             as headers.
    -d, --delimiter <arg>    The field delimiter for reading CSV data.
                             Must be a single character.
";

#[derive(Deserialize)]
struct Args {
    arg_column: SelectColumns,
    arg_input: Option<String>,
    flag_key: Option<String>,
    flag_no_headers: bool,
    flag_delimiter: Option<Delimiter>,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
    let args: Args = util::get_args(USAGE, argv)?;

    let rconf = Config::new(&args.arg_input)
        .delimiter(args.flag_delimiter)
        .no_headers(args.flag_no_headers)
        .select(args.arg_column);

    let mut rdr = rconf.reader()?;
    let headers = rdr.byte_headers()?;

    let sel_index = rconf.single_selection(headers)?;

    let key_expr = match &args.flag_key {
        Some(expr) => format!("col({}) | {}", sel_index, expr),
        None => format!("col({})", sel_index),
    };

    let program = Program::parse(&key_expr, headers)?;
    let mut clustering: Box<dyn ClusteringAlgorithm> = Box::<KeyCollision>::default();

    let mut record = csv::ByteRecord::new();
    let mut index: usize = 0;

    while rdr.read_byte_record(&mut record)? {
        let value = String::from_utf8(record[sel_index].to_vec()).unwrap();
        let key = program.generate_key(index, &record)?;

        clustering.process(index, key, value);

        index += 1;
    }

    let mut clusters = clustering.into_clusters();

    clusters.sort_by(|a, b| {
        b.values
            .len()
            .cmp(&a.values.len())
            .then_with(|| b.rows.len().cmp(&a.rows.len()))
            .then_with(|| a.best().cmp(b.best()))
    });

    let toml_output = TOMLClusters { cluster: clusters };

    println!("{}", toml::to_string_pretty(&toml_output).unwrap());

    Ok(())
}

#[derive(Debug)]
struct Cluster {
    id: usize,
    key: String,
    rows: Vec<usize>,
    values: Vec<(String, usize)>,
}

impl Serialize for Cluster {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut state = serializer.serialize_struct("Cluster", 8)?;
        state.serialize_field("id", &self.id)?;
        state.serialize_field("key", &self.key)?;
        state.serialize_field("nb_values", &self.values.len())?;
        state.serialize_field("nb_rows", &self.rows.len())?;
        state.serialize_field(
            "rows",
            &self
                .rows
                .iter()
                .map(|i| i.to_string())
                .collect::<Vec<_>>()
                .join(","),
        )?;
        state.serialize_field("replace_with", self.best())?;
        state.serialize_field("values", &self.values)?;
        state.serialize_field("harmonize", &false)?;
        state.end()
    }
}

impl Cluster {
    fn from_entries(id: usize, key: String, entries: Vec<(usize, String)>) -> Self {
        let mut rows = Vec::new();
        let mut values = HashMap::new();

        for (row_index, row_value) in entries {
            rows.push(row_index);
            values
                .entry(row_value)
                .and_modify(|count| *count += 1)
                .or_insert(1);
        }

        let mut values = values.into_iter().collect::<Vec<_>>();
        values.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| b.0.cmp(&a.0)));

        Cluster {
            id,
            key,
            rows,
            values,
        }
    }

    fn best(&self) -> &String {
        &self.values[0].0
    }
}

trait ClusteringAlgorithm {
    fn process(&mut self, index: usize, key: String, value: String);
    fn into_clusters(self: Box<Self>) -> Vec<Cluster>;
}

#[derive(Default)]
struct KeyCollision {
    collisions: HashMap<String, Vec<(usize, String)>>,
}

impl ClusteringAlgorithm for KeyCollision {
    fn process(&mut self, index: usize, key: String, value: String) {
        match self.collisions.entry(key) {
            Entry::Occupied(mut entry) => {
                entry.get_mut().push((index, value));
            }
            Entry::Vacant(entry) => {
                entry.insert(vec![(index, value)]);
            }
        };
    }

    fn into_clusters(self: Box<Self>) -> Vec<Cluster> {
        self.collisions
            .into_iter()
            .enumerate()
            .map(|(id, (key, entries))| Cluster::from_entries(id, key, entries))
            .filter(|cluster| cluster.values.len() > 1)
            .collect()
    }
}

#[derive(Serialize)]
struct TOMLClusters {
    cluster: Vec<Cluster>,
}