use anyhow::bail;
use clap::{Parser, ValueEnum};
use libradicl::record::{
AlevinFryReadRecord, AlevinFryRecordContext, PiscemBulkReadRecord, PiscemBulkRecordContext,
};
use needletail::bitkmer::*;
use std::io;
use std::io::{BufReader, Write};
use tracing::error;
#[derive(Clone, Debug, PartialEq, ValueEnum)]
pub enum RadFileType {
Bulk,
SingleCell,
Unknown,
}
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
pub struct ViewOpts {
#[arg(short, long, required = true)]
input: std::path::PathBuf,
#[arg(short, long)]
output: Option<std::path::PathBuf>,
#[arg(short, long)]
rad_type: RadFileType,
#[arg(long)]
use_ref_name: bool,
#[arg(long)]
no_header: bool,
#[arg(long)]
max_chunks: Option<usize>,
}
pub struct ExtraRecordInfo<'a> {
pub bc_len: usize,
pub umi_len: usize,
pub use_ref_name: bool,
pub prelude: &'a libradicl::header::RadPrelude,
pub max_chunks: Option<usize>,
}
impl<'a> ExtraRecordInfo<'a> {
pub fn ref_name(&self, i: usize) -> &str {
&self.prelude.hdr.ref_names[i]
}
}
pub trait WriteMappingRecord {
fn write_records(
&self,
ctx: &ExtraRecordInfo,
output_stream: &mut Box<dyn Write>,
) -> anyhow::Result<()>;
}
impl WriteMappingRecord for libradicl::record::PiscemBulkReadRecord {
fn write_records(
&self,
ctx: &ExtraRecordInfo,
output_stream: &mut Box<dyn Write>,
) -> anyhow::Result<()> {
writeln!(output_stream, "{{")?;
writeln!(
output_stream,
" \"frag_type\" : \"{:?}\",",
libradicl::rad_types::MappingType::from_u8(self.frag_type)
)?;
writeln!(output_stream, " \"alns\" : [")?;
for i in 0..(self.refs.len()) {
if ctx.use_ref_name {
write!(
output_stream,
" {{\"ref\": {:?}, \"dir\": \"{:?}\", \"pos\": {}, \"flen\": {} }}",
ctx.ref_name(self.refs[i] as usize),
self.dirs[i],
self.positions[i],
self.frag_lengths[i]
)?;
} else {
write!(
output_stream,
" {{\"ref\": {}, \"dir\": \"{:?}\", \"pos\": {}, \"flen\": {} }}",
self.refs[i], self.dirs[i], self.positions[i], self.frag_lengths[i]
)?;
}
if i < self.refs.len() - 1 {
writeln!(output_stream, ",")?;
} else {
writeln!(output_stream)?;
}
}
writeln!(output_stream, " ]")?;
write!(output_stream, "}}")?;
Ok(())
}
}
impl WriteMappingRecord for libradicl::record::AlevinFryReadRecord {
fn write_records(
&self,
ctx: &ExtraRecordInfo,
output_stream: &mut Box<dyn Write>,
) -> anyhow::Result<()> {
let bc_mer: BitKmer = (self.bc, ctx.bc_len as u8);
let umi_mer: BitKmer = (self.umi, ctx.umi_len as u8);
writeln!(output_stream, "{{")?;
writeln!(
output_stream,
" \"barcode\" : {:?},\n \"umi\" : {:?},",
unsafe { std::str::from_utf8_unchecked(&bitmer_to_bytes(bc_mer)[..]) },
unsafe { std::str::from_utf8_unchecked(&bitmer_to_bytes(umi_mer)[..]) },
)?;
writeln!(output_stream, " \"alns\" : [")?;
for i in 0..(self.refs.len()) {
if ctx.use_ref_name {
write!(
output_stream,
" {{\"ref\": {:?}, \"dir\": \"{}\" }}",
ctx.ref_name(self.refs[i] as usize),
if self.dirs[i] { "fw" } else { "rc" }
)?;
} else {
write!(
output_stream,
" {{\"ref\": {}, \"dir\": \"{}\" }}",
self.refs[i],
if self.dirs[i] { "fw" } else { "rc" }
)?;
}
if i < self.refs.len() - 1 {
writeln!(output_stream, ",")?;
} else {
writeln!(output_stream)?;
}
}
writeln!(output_stream, " ]")?;
write!(output_stream, "}}")?;
Ok(())
}
}
pub fn write_records<
RecordContext: std::fmt::Debug + Clone + libradicl::record::RecordContext,
RecordType: std::fmt::Debug
+ libradicl::record::MappedRecord<ParsingContext = RecordContext>
+ WriteMappingRecord,
R: std::io::BufRead,
>(
prelude: &libradicl::header::RadPrelude,
extra_record_info: &ExtraRecordInfo,
ifile: &mut R,
output_stream: &mut Box<dyn Write>,
) -> anyhow::Result<()> {
let tag_context = prelude.get_record_context::<RecordContext>()?;
let total_chunks = if prelude.hdr.num_chunks > 0 {
prelude.hdr.num_chunks as usize
} else {
usize::MAX - 1
};
let mut chunk_num = 0;
let num_chunks = extra_record_info
.max_chunks
.unwrap_or(total_chunks)
.min(total_chunks);
while chunk_num < num_chunks
&& libradicl::utils::has_data_left(ifile).expect("encountered error reading input file")
{
let chunk = libradicl::chunk::Chunk::<RecordType>::from_bytes(ifile, &tag_context);
let nreads = chunk.reads.len();
for (rnum, r) in chunk.reads.iter().enumerate() {
r.write_records(extra_record_info, output_stream)?;
if (chunk_num == num_chunks - 1) && (rnum == nreads - 1) {
writeln!(output_stream)?;
} else {
writeln!(output_stream, ",")?;
}
}
chunk_num += 1;
}
Ok(())
}
pub fn write_header(
prelude: &libradicl::header::RadPrelude,
file_tag_map: &libradicl::rad_types::TagMap,
output_stream: &mut Box<dyn Write>,
) -> anyhow::Result<()> {
writeln!(output_stream, "\"rad_header\" : {{")?;
writeln!(output_stream, "\"is_paired\" : {},", prelude.hdr.is_paired)?;
writeln!(output_stream, "\"ref_count\" : {},", prelude.hdr.ref_count)?;
writeln!(output_stream, "\"refs\" : [")?;
for (i, rn) in prelude.hdr.ref_names.iter().enumerate() {
write!(output_stream, "\"{}\"", rn)?;
if i < (prelude.hdr.ref_count - 1) as usize {
writeln!(output_stream, ",")?;
}
}
writeln!(output_stream, "],")?;
writeln!(output_stream, "\"num_chunks\" : {}", prelude.hdr.num_chunks)?;
writeln!(output_stream, "}},")?;
writeln!(output_stream, "\"tag_descriptions\" : {{")?;
writeln!(output_stream, " \"file_tag_desc\" : {{")?;
writeln!(
output_stream,
" \"label\" : \"{:?}\",",
prelude.file_tags.label
)?;
writeln!(output_stream, " \"tag_desc\" : [")?;
for (i, td) in prelude.file_tags.tags.iter().enumerate() {
writeln!(output_stream, " {{\n \"name\" : \"{}\",", td.name)?;
write!(output_stream, " \"desc\" : \"{:?}\"\n }}", td.typeid)?;
if i < (prelude.file_tags.tags.len() - 1) {
writeln!(output_stream, ",")?;
} else {
writeln!(output_stream)?;
}
}
writeln!(output_stream, " ]")?;
writeln!(output_stream, " }},")?;
writeln!(output_stream, " \"read_tag_desc\" : {{")?;
writeln!(
output_stream,
" \"label\" : \"{:?}\",",
prelude.read_tags.label
)?;
writeln!(output_stream, " \"tag_desc\" : [")?;
for (i, td) in prelude.read_tags.tags.iter().enumerate() {
writeln!(output_stream, " {{\n \"name\" : \"{}\",", td.name)?;
write!(output_stream, " \"desc\" : \"{:?}\"\n }}", td.typeid)?;
if i < (prelude.read_tags.tags.len() - 1) {
writeln!(output_stream, ",")?;
} else {
writeln!(output_stream)?;
}
}
writeln!(output_stream, " ]")?;
writeln!(output_stream, " }},")?;
writeln!(output_stream, " \"aln_tag_desc\" : {{")?;
writeln!(
output_stream,
" \"label\" : \"{:?}\",",
prelude.aln_tags.label
)?;
writeln!(output_stream, " \"tag_desc\" : [")?;
for (i, td) in prelude.aln_tags.tags.iter().enumerate() {
writeln!(output_stream, " {{\n \"name\" : \"{}\",", td.name)?;
write!(output_stream, " \"desc\" : \"{:?}\"\n }}", td.typeid)?;
if i < (prelude.aln_tags.tags.len() - 1) {
writeln!(output_stream, ",")?;
} else {
writeln!(output_stream)?;
}
}
writeln!(output_stream, " ]")?;
writeln!(output_stream, " }}")?;
writeln!(output_stream, "}},")?;
writeln!(output_stream, "\"file_tags\" : [")?;
let nft = prelude.file_tags.tags.len();
for (i, td) in prelude.file_tags.tags.iter().enumerate() {
if let Some(tv) = file_tag_map.get(&td.name) {
write!(
output_stream,
"{{ \"name\" : \"{}\", \"val\" : \"{:?}\" }}",
&td.name, tv
)?;
if i == nft - 1 {
writeln!(output_stream)?;
} else {
writeln!(output_stream, ",")?;
}
}
}
writeln!(output_stream, "],")?;
Ok(())
}
pub fn view(view_opts: &ViewOpts) -> anyhow::Result<()> {
if view_opts.rad_type == RadFileType::Unknown {
error!("Unknown file type not yet supported");
bail!("Unknown file type not yet supported");
}
let mut output_stream: Box<dyn Write> = match view_opts.output {
Some(ref path) => std::fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
.map(|f| Box::new(std::io::BufWriter::new(f)) as Box<dyn Write>)?,
None => Box::new(io::stdout()),
};
let f = std::fs::File::open(&view_opts.input)?;
let mut ifile = BufReader::new(f);
let prelude = libradicl::header::RadPrelude::from_bytes(&mut ifile)?;
let file_tag_map = prelude.file_tags.try_parse_tags_from_bytes(&mut ifile)?;
writeln!(output_stream, "{{")?;
if !view_opts.no_header {
write_header(&prelude, &file_tag_map, &mut output_stream)?;
}
let mut extra_record_info = ExtraRecordInfo {
bc_len: 0,
umi_len: 0,
use_ref_name: view_opts.use_ref_name,
prelude: &prelude,
max_chunks: view_opts.max_chunks,
};
writeln!(output_stream, "\"mapped_records\" : [")?;
match view_opts.rad_type {
RadFileType::Bulk => {
write_records::<PiscemBulkRecordContext, PiscemBulkReadRecord, BufReader<std::fs::File>>(
&prelude,
&extra_record_info,
&mut ifile,
&mut output_stream,
)?;
}
RadFileType::SingleCell => {
let cblen: u64 = file_tag_map
.get("cblen")
.expect("tag map must contain \"cblen\" value")
.try_into()?;
let ulen: u64 = file_tag_map
.get("ulen")
.expect("tag map must contain \"ulen\" value")
.try_into()?;
extra_record_info.bc_len = cblen as usize;
extra_record_info.umi_len = ulen as usize;
write_records::<AlevinFryRecordContext, AlevinFryReadRecord, BufReader<std::fs::File>>(
&prelude,
&extra_record_info,
&mut ifile,
&mut output_stream,
)?;
}
RadFileType::Unknown => bail!("Unknown RadFileType not supported yet"),
}
writeln!(output_stream, "]")?;
writeln!(output_stream, "}}")?;
Ok(())
}