use std::fs::File;
use std::io::{stdin, BufRead, BufReader, Cursor};
use std::path::Path;
use anyhow::{anyhow, Result};
use paste::paste;
use ff_structure::DotBracketVec;
use ff_energy::NucleotideVec;
#[derive(Clone, Copy)]
enum NAMode {
Lenient,
Strict,
}
fn parse_na_format<R: BufRead>(
reader: R,
mode: NAMode,
is_rna: bool,
) -> Result<(Option<String>, NucleotideVec, DotBracketVec)> {
let mut header: Option<String> = None;
let mut sequence: Option<NucleotideVec> = None;
let mut structure: Option<DotBracketVec> = None;
for line in reader.lines() {
let line = line?;
let line = line.trim();
if line.is_empty() {
if sequence.is_some() && structure.is_some() {
break;
} else {
continue;
}
}
if line.starts_with('>') {
header = Some(line.to_string());
} else if sequence.is_none() {
let token = line.split_whitespace().next().unwrap();
if is_rna {
sequence = Some(NucleotideVec::try_from_rna(token)?);
} else {
sequence = Some(NucleotideVec::try_from_dna(token)?);
}
} else if structure.is_none() {
let token = line.split_whitespace().next().unwrap();
structure = Some(DotBracketVec::try_from(token)?);
break;
}
}
let sequence = sequence.ok_or_else(|| anyhow!("Missing sequence line"))?;
let structure = match (structure, mode) {
(Some(s), NAMode::Strict) => {
if sequence.len() != s.len() {
return Err(anyhow!(
"Sequence length ({}) and structure length ({}) do not match",
sequence.len(),
s.len()
));
}
s
},
(None, NAMode::Strict) => return Err(anyhow!("Missing structure line")),
(Some(s), NAMode::Lenient) => {
if sequence.len() < s.len() {
return Err(anyhow!(
"Structure is longer than sequence ({} > {}).",
s.len(), sequence.len()
));
}
s
},
(None, NAMode::Lenient) => {
DotBracketVec::try_from(".")
.expect("Failed to construct open-chain structure")
}
};
Ok((header, sequence, structure))
}
pub fn read_cotr<R: BufRead>(reader: R, is_rna: bool) -> Result<(Option<String>, NucleotideVec, DotBracketVec)> {
parse_na_format(reader, NAMode::Lenient, is_rna)
}
pub fn read_eval<R: BufRead>(reader: R, is_rna: bool) -> Result<(Option<String>, NucleotideVec, DotBracketVec)> {
parse_na_format(reader, NAMode::Strict, is_rna)
}
macro_rules! define_input_variants {
($base:ident, $ret:ty) => {
paste! {
pub fn [<$base _string>](s: &str, rna: bool) -> $ret {
$base(Cursor::new(s), rna)
}
pub fn [<$base _file>]<P: AsRef<Path>>(path: P, rna: bool) -> $ret {
let reader = BufReader::new(File::open(path)?);
$base(reader, rna)
}
pub fn [<$base _stdin>](rna: bool) -> $ret {
let reader = BufReader::new(stdin());
$base(reader, rna)
}
pub fn [<$base _input>](s: &str, rna: bool) -> $ret {
if s == "-" {
[<$base _stdin>](rna)
} else {
[<$base _file>](s, rna)
}
}
}
};
}
type NAResult = Result<(Option<String>, NucleotideVec, DotBracketVec)>;
define_input_variants!(read_cotr, NAResult);
define_input_variants!(read_eval, NAResult);
pub fn ruler(len: usize) -> String {
let mut s = String::new();
let mut c = 0;
for i in 0..=len {
if i % 10 == 0 {
let t = format!("{}", i / 10);
c = t.len() - 1;
s.push_str(&t);
continue;
} else if c > 0 {
c -= 1;
continue;
}
if i % 10 == 5 {
s.push(',');
} else {
s.push('.');
}
}
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ruler() {
assert_eq!(ruler(0), "0");
assert_eq!(ruler(5), "0....,");
assert_eq!(ruler(10), "0....,....1");
}
#[test]
fn test_read_cotr_input() {
let input = ">test\nACGU\n....\n";
let (hdr, seq, dbv) = read_cotr_string(input, true).unwrap();
assert_eq!(hdr, Some(">test".into()));
assert_eq!(seq.to_string(), "ACGU");
assert_eq!(dbv.to_string(), "....");
let input = ">test\nACGU";
let (hdr, seq, dbv) = read_cotr_string(input, true).unwrap();
assert_eq!(hdr, Some(">test".into()));
assert_eq!(seq.to_string(), "ACGU");
assert_eq!(dbv.to_string(), ".");
}
#[test]
fn test_read_eval_input() {
let input = ">test\nACGU\n....\n";
let ok = read_eval_string(input, true);
assert!(ok.is_ok());
let missing = ">test\nACGU\n";
let err = read_eval_string(missing, true);
assert!(err.is_err(), "Missing structure line should fail in strict mode");
}
}