use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct MergefaArgs {
#[arg(value_name = "in1.fa")]
pub input1: String,
#[arg(value_name = "in2.fa")]
pub input2: String,
#[arg(short = 'q', value_name = "INT", default_value = "0")]
pub qual_threshold: u8,
#[arg(short = 'i')]
pub intersect: bool,
#[arg(short = 'm')]
pub mask: bool,
#[arg(short = 'r')]
pub rand_het: bool,
#[arg(short = 's')]
pub suppress_het: bool,
}
const NT16_REV: [u8; 16] = [
b'=', b'A', b'C', b'M', b'G', b'R', b'S', b'V', b'T', b'W', b'Y', b'H', b'K', b'D', b'B', b'N',
];
pub fn run(args: &MergefaArgs) -> Result<()> {
if args.mask && args.intersect {
anyhow::bail!("'-i' and '-m' cannot be applied at the same time");
}
let mut reader1 = if args.input1 == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input1)?
};
let mut reader2 = SeqReader::from_path(&args.input2)?;
let mut record1 = SeqRecord::new(Vec::new(), Vec::new());
let mut record2 = SeqRecord::new(Vec::new(), Vec::new());
let mut cnt = [0u64; 5];
while reader1.read_next(&mut record1)? {
if !reader2.read_next(&mut record2)? {
break;
}
if record1.name != record2.name {
eprintln!(
"不同的序列名称: {} != {}",
String::from_utf8_lossy(&record1.name),
String::from_utf8_lossy(&record2.name)
);
}
if record1.seq.len() != record2.seq.len() {
eprintln!(
"不等的序列长度: {} != {}",
record1.seq.len(),
record2.seq.len()
);
}
let min_l = record1.seq.len().min(record2.seq.len());
print!(">{}", String::from_utf8_lossy(&record1.name));
for i in 0..min_l {
let c = [record1.seq[i], record2.seq[i]];
let is_upper = if args.qual_threshold > 0 {
c[0].is_ascii_uppercase() && c[1].is_ascii_uppercase()
} else {
c[0].is_ascii_uppercase() && c[1].is_ascii_uppercase()
};
let mut c_nt16 = [
LOOKUP_TABLES.nt16[c[0] as usize],
LOOKUP_TABLES.nt16[c[1] as usize],
];
if c_nt16[0] == 0 {
c_nt16[0] = 15;
}
if c_nt16[1] == 0 {
c_nt16[1] = 15;
}
let b = [
LOOKUP_TABLES.bitcnt[c_nt16[0] as usize],
LOOKUP_TABLES.bitcnt[c_nt16[1] as usize],
];
let mut is_upper_final = is_upper;
if is_upper {
if b[0] == 1 && b[1] == 1 {
if c_nt16[0] == c_nt16[1] {
cnt[0] += 1;
} else {
cnt[1] += 1;
}
} else if b[0] == 1 && b[1] == 2 {
cnt[2] += 1;
} else if b[0] == 2 && b[1] == 1 {
cnt[3] += 1;
} else if b[0] == 2 && b[1] == 2 {
cnt[4] += 1;
}
}
if args.suppress_het && (b[0] > 1 || b[1] > 1) {
is_upper_final = false;
}
if args.intersect {
c_nt16[0] &= c_nt16[1];
if c_nt16[0] == 0 {
is_upper_final = false;
}
} else if args.mask {
if c_nt16[0] == 15 || c_nt16[1] == 15 {
is_upper_final = false;
}
c_nt16[0] &= c_nt16[1];
if c_nt16[0] == 0 {
is_upper_final = false;
}
} else if args.rand_het {
use rand::Rng;
let mut rng = rand::rng();
if b[0] == 1 && b[1] == 1 {
c_nt16[0] |= c_nt16[1];
} else if ((b[0] == 1 && b[1] == 2) || (b[0] == 2 && b[1] == 1))
&& (c_nt16[0] & c_nt16[1]) != 0
{
c_nt16[0] = if rng.random::<bool>() {
c_nt16[0] & c_nt16[1]
} else {
c_nt16[0] | c_nt16[1]
};
} else if b[0] == 2 && b[1] == 2 && c_nt16[0] == c_nt16[1] {
if rng.random::<bool>() {
if rng.random::<bool>() {
for bit in [8, 4, 2, 1] {
if c_nt16[0] & bit != 0 {
c_nt16[0] &= bit;
break;
}
}
} else {
for bit in [1, 2, 4, 8] {
if c_nt16[0] & bit != 0 {
c_nt16[0] &= bit;
break;
}
}
}
}
} else {
is_upper_final = false;
}
} else {
c_nt16[0] |= c_nt16[1];
}
let mut out_base = NT16_REV[c_nt16[0] as usize];
if !is_upper_final {
out_base = out_base.to_ascii_lowercase();
}
if i % 60 == 0 {
println!();
}
print!("{}", out_base as char);
}
println!();
}
eprintln!(
"(same,diff,hom-het,het-hom,het-het)=({},{},{},{},{})",
cnt[0], cnt[1], cnt[2], cnt[3], cnt[4]
);
Ok(())
}