use regex::bytes::Regex;

use crate::config::{Config, Delimiter};
use crate::util;
use crate::CliResult;

static USAGE: &str = "
Read unusually formatted CSV data.

This means being able to process CSV data with peculiar quoting rules
using --quote or --no-quoting, or dealing with character escaping with --escape.

This command also makes it possible to process CSV files containing metadata and
headers before the tabular data itself, with -S/--skip-headers, -L/--skip-lines.

This command also recognizes variant of TSV files from bioinformatics out of the
box, either by detecting their extension or through dedicated flags:

    - VCF (\"Variant Call Format\") files:
        extensions: `.vcf`, `.vcf.gz`
        flag: --vcf
        reference: https://en.wikipedia.org/wiki/Variant_Call_Format
    - GTF (\"Gene Transfert Format\") files:
        extension: `.gtf`, `.gtf.gz`, `.gff2`, `.gff2.gz`
        flag: --gtf
        reference: https://en.wikipedia.org/wiki/Gene_transfer_format
    - GFF (\"General Feature Format\") files:
        extension: `.gff`, `.gff.gz`, `.gff3`, `.gff3.gz`
        flag: --gff
        reference: https://en.wikipedia.org/wiki/General_feature_format

Usage:
    xan input [options] [<input>]

input options:
    --tabs                        Same as -d '\\t', i.e. use tabulations as delimiter.
    --quote <char>                The quote character to use. [default: \"]
    --escape <char>               The escape character to use. When not specified,
                                  quotes are escaped by doubling them.
    --no-quoting                  Disable quoting completely.
    -L, --skip-lines <n>          Skip the first <n> lines of the file.
    -H, --skip-headers <pattern>  Skip header lines matching the given regex pattern.
    -R, --skip-rows <pattern>     Skip rows matching the given regex pattern.
    --vcf                         Process a VCF file. Shorthand for --tabs -H '^##' and
                                  some processing over the first column name.
    --gtf                         Process a GTF file. Shorthand for --tabs -H '^#!'.
    --gff                         Process a GFF file. Shorthand for --tabs -H '^#[#!]'
                                  and -R '^###$'.

Common options:
    -h, --help             Display this message
    -o, --output <file>    Write output to <file> instead of stdout.
    -d, --delimiter <arg>  The field delimiter for reading CSV data.
                           Must be a single character.
";

#[derive(Deserialize)]
struct Args {
    arg_input: Option<String>,
    flag_output: Option<String>,
    flag_delimiter: Option<Delimiter>,
    flag_tabs: bool,
    flag_quote: Delimiter,
    flag_skip_lines: Option<usize>,
    flag_skip_headers: Option<String>,
    flag_skip_rows: Option<String>,
    flag_vcf: bool,
    flag_gtf: bool,
    flag_gff: bool,
    flag_escape: Option<Delimiter>,
    flag_no_quoting: bool,
}

impl Args {
    fn resolve(&mut self) {
        if let Some(path) = &self.arg_input {
            if path.ends_with(".vcf") || path.ends_with(".vcf.gz") {
                self.flag_vcf = true;
            }

            if path.ends_with(".gtf")
                || path.ends_with(".gtf.gz")
                || path.ends_with(".gff2")
                || path.ends_with(".gff2.gz")
            {
                self.flag_gtf = true;
            }

            if path.ends_with(".gff")
                || path.ends_with(".gff.gz")
                || path.ends_with(".gff3")
                || path.ends_with(".gff3.gz")
            {
                self.flag_gff = true;
            }
        }

        if self.flag_vcf {
            self.flag_tabs = true;
            self.flag_skip_headers = Some("^##".to_string());
        }

        if self.flag_gtf {
            self.flag_tabs = true;
            self.flag_skip_headers = Some("^#!".to_string());
        }

        if self.flag_gff {
            self.flag_tabs = true;
            self.flag_skip_headers = Some("^#[#!]".to_string());
            self.flag_skip_rows = Some("^###$".to_string());
        }

        if self.flag_tabs {
            self.flag_delimiter = Some(Delimiter(b'\t'));
        }
    }
}

pub fn run(argv: &[&str]) -> CliResult<()> {
    let mut args: Args = util::get_args(USAGE, argv)?;
    args.resolve();

    if args.flag_skip_headers.is_some() && args.flag_skip_lines.is_some() {
        Err("-L/--skip-lines does not work with -H/--skip-headers!")?;
    }

    let mut rconfig = Config::new(&args.arg_input)
        .delimiter(args.flag_delimiter)
        .no_headers(true)
        .flexible(
            args.flag_skip_headers.is_some()
                || args.flag_skip_lines.is_some()
                || args.flag_skip_rows.is_some(),
        )
        .quote(args.flag_quote.as_byte());

    let skip_headers = args
        .flag_skip_headers
        .as_ref()
        .map(|p| Regex::new(p))
        .transpose()?;

    let skip_rows = args
        .flag_skip_rows
        .as_ref()
        .map(|p| Regex::new(p))
        .transpose()?;

    let wconfig = Config::new(&args.flag_output);

    if let Some(escape) = args.flag_escape {
        rconfig = rconfig.escape(Some(escape.as_byte())).double_quote(false);
    }
    if args.flag_no_quoting {
        rconfig = rconfig.quoting(false);
    }

    let mut wtr = wconfig.writer()?;
    let mut record = csv::ByteRecord::new();

    let mut rdr = rconfig.reader()?;
    let mut headers_have_been_skipped = false;
    let mut i: usize = 0;

    while rdr.read_byte_record(&mut record)? {
        i += 1;

        if let Some(pattern) = &skip_headers {
            if !headers_have_been_skipped {
                if !pattern.is_match(record.as_slice()) {
                    headers_have_been_skipped = true;

                    if args.flag_vcf {
                        record = record
                            .iter()
                            .enumerate()
                            .map(|(i, cell)| {
                                if i == 0 && cell == b"#CHROM" {
                                    b"CHROM"
                                } else {
                                    cell
                                }
                            })
                            .collect();
                    }
                } else {
                    continue;
                }
            }
        } else if let Some(skip_lines) = args.flag_skip_lines {
            if i <= skip_lines {
                continue;
            }
        }

        if let Some(pattern) = &skip_rows {
            if pattern.is_match(record.as_slice()) {
                continue;
            }
        }

        wtr.write_record(&record)?;
    }

    wtr.flush()?;

    Ok(())
}