use std::io::BufWriter;
use std::io::Write;
use std::fs::File;
use std::path::Path;
use std::path::PathBuf;
use std::time::Instant;
use clap::{self, Parser, Subcommand, Args};
use rustfastq::utils::get_spinner;
use rustfastq::{phred_counter, io::quality_filter};
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
struct Cli {
#[clap(short ='o', long = "output")]
output: String,
#[clap(subcommand)]
command: MyCommand
}
#[allow(non_camel_case_types)]
#[derive(Subcommand)]
enum MyCommand {
phred(PhredArgs),
count(CountArgs),
qcfilter(QCFilterArgs),
count_sampleix(SampleIxArgs),
demux_dual(DemuxDualArgs),
}
#[derive(Args)]
struct QCFilterArgs{
#[clap()]
fastq_file: String,
#[clap(short = 'q', long= "qcscore")]
qcscore: f32,
}
#[derive(Args)]
struct SampleIxArgs{
#[clap(long= "i1")]
i1_list: Vec<String>,
#[clap(long= "i2")]
i2_list: Vec<String>,
}
#[derive(Args)]
struct DemuxDualArgs{
#[clap(long= "i1")]
i1_list: Vec<String>,
#[clap(long= "i2")]
i2_list: Vec<String>,
#[clap(long= "r1")]
r1_list: Vec<String>,
#[clap(long= "r2")]
r2_list: Vec<String>,
#[clap(long= "samplesheet")]
samplesheet: PathBuf,
}
#[derive(Args)]
struct CountArgs{
#[clap()]
fastq_list: Vec<String>,
}
#[derive(Args)]
struct PhredArgs{
#[clap()]
fastq_list: Vec<String>,
}
#[derive(Args)]
struct FastqArgs{
#[clap(short = 'w', long= "whitelist")]
whitelist: String,
#[clap(short = 'n', long= "ntop")]
topn: usize,
#[clap()]
fastq_list: Vec<String>,
}
fn main() {
let cli = Cli::parse();
match cli.command{
MyCommand::phred(args) => {
println!("Doing Phred Counter");
phred_counter::run(&args.fastq_list, cli.output)
}
MyCommand::count(args) => {
println!("Doing counting");
let mut file_handle = File::create(cli.output).unwrap();
for filename in args.fastq_list{
println!("Counting {}", filename.clone());
let now = Instant::now();
let c = count_fastq_reads(filename.clone());
let elapsed_time = now.elapsed();
println!("Counted {}, took {} minutes.", filename.clone(), elapsed_time.as_secs()/60);
file_handle.write_all(format!("{}\t{}\n", filename, c).as_bytes()).unwrap();
}
}
MyCommand::qcfilter(args) => {
quality_filter(&args.fastq_file, &cli.output, args.qcscore)
},
MyCommand::count_sampleix(args) => {
let count_map = paired_index_counter(args.i1_list, args.i2_list);
let mut count_vec: Vec<((String, String), usize)> = count_map.into_iter().collect();
count_vec.sort_by(|a,b| b.1.cmp(&a.1));
let mut fh = BufWriter::new(File::create(cli.output).unwrap());
for ((s1, s2), c) in count_vec.iter() {
writeln!(fh, "{},{},{}", s1,s2,c).unwrap();
}
},
MyCommand::demux_dual(args) => {
},
};
}
pub fn count_fastq_reads(filename: String) -> usize{
let count = rustfastq::io::fastq_list_iter(&[filename]).count();
count
}
use std::collections::HashMap;
use itertools::izip;
pub fn paired_index_counter(i1_list: Vec<String>, i2_list: Vec<String>) -> HashMap<(String, String), usize> {
let i1 = rustfastq::io::fastq_list_iter(&i1_list);
let i2 = rustfastq::io::fastq_list_iter(&i2_list);
let mut counter: HashMap<(String, String), usize> = HashMap::new();
let bar = get_spinner();
for (i,(f1, f2)) in izip!(i1, i2).enumerate() {
let c = counter.entry((f1.seq, f2.seq)).or_insert(0);
*c += 1;
if i % 1_000_000 ==0 {
bar.inc(1_000_000)
}
}
counter
}
#[test]
fn test_paired() {
let count_map = paired_index_counter(
vec!["/home/michi/mounts/myDrive/230601_VH00715_118_AACVG5JM5_fastq/Undetermined_S0_L001_I1_001.fastq.gz".to_string()],
vec!["/home/michi/mounts/myDrive/230601_VH00715_118_AACVG5JM5_fastq/Undetermined_S0_L001_I2_001.fastq.gz".to_string()],
);
let mut count_vec: Vec<((String, String), usize)> = count_map.into_iter().collect();
count_vec.sort_by(|a,b| b.1.cmp(&a.1));
for ((s1, s2), c) in count_vec.iter().take(20) {
println!("{}_{}:{}", s1,s2,c)
}
}
#[test]
fn test_bamfile_RG() {
use std::io::{BufRead, BufReader};
let fh = BufReader::new(
File::open("/tmp/header.sam").unwrap()
);
for line in fh.lines()
.map(|r| r.unwrap())
.filter(|l| l.starts_with("@RG")) {
println!("Line: {}", line);
let y = parse_rg_line(&line);
println!("Parse: {:?}", y);
}
use regex::Regex;
use std::str::FromStr;
fn parse_rg_line(line: &str) -> Option<(String, (String, u32))> {
let mut entries = line.split('\t');
entries.next()?;
let mut tags = entries
.map(|entry| entry.split_once(':').unwrap())
.collect::<HashMap<_, _>>();
println!("tags: {:?}", tags);
let v = tags.remove("ID")?;
let (rg, lane) = v.rsplit_once(':')?;
println!("rg:{:?} lane:{:?}", rg, lane);
println!("{:?}", u32::from_str(lane));
let result = match u32::from_str(lane) {
Ok(n) => Some((v.to_string(), (rg.to_string(), n))),
Err(_) => {
let re = Regex::new(r"^([0-9]+)-[0-9A-F]+$").unwrap();
let cap = re.captures(lane)?;
let lane_u32 = u32::from_str(cap.get(1).unwrap().as_str()).unwrap();
Some((v.to_string(), (rg.to_string(), lane_u32)))
}
};
println!("Final return: {:?}", result);
result
}
}