use crate::{
common::parser::{jump_newline, parse_residue, parse_right},
types::{
AminoAcid, Chain, ModifiedAminoAcidTable, ModifiedNucleotideTable, Nucleotide, ParseFw3,
Residue,
},
};
use nom::{
bytes::complete::take,
character::complete::{anychar, line_ending, not_line_ending},
combinator::map,
IResult,
};
pub struct SeqResParser;
impl SeqResParser {
pub fn parse<'a>(
inp: &'a [u8],
modified_aa: &ModifiedAminoAcidTable,
modified_nuc: &ModifiedNucleotideTable,
) -> IResult<(), (Vec<Chain<AminoAcid>>, Vec<Chain<Nucleotide>>)> {
let mut inp = inp;
let mut chains_aa: Vec<Chain<AminoAcid>> = Vec::new();
let mut chains_nuc: Vec<Chain<Nucleotide>> = Vec::new();
while inp.len() > 0 {
let (new_inp, _) = Self::parse_chain(
inp,
modified_aa,
modified_nuc,
&mut chains_aa,
&mut chains_nuc,
)
.unwrap(); inp = new_inp;
}
Ok(((), (chains_aa, chains_nuc)))
}
}
impl SeqResParser {
pub fn parse_chain<'a>(
inp: &'a [u8],
modified_aa: &ModifiedAminoAcidTable,
modified_nuc: &ModifiedNucleotideTable,
chains_aa: &mut Vec<Chain<AminoAcid>>,
chains_nuc: &mut Vec<Chain<Nucleotide>>,
) -> IResult<&'a [u8], ()> {
let inp = &inp[5..]; let (inp, chain) = anychar(inp)?; let inp = &inp[1..]; let (inp, n) = parse_right::<u32>(inp, 4)?; let inp = &inp[2..]; let lines = n / 13u32;
let last_line_items = n % 13u32;
let mut inp = inp;
let first_res = &inp[..3];
match parse_residue(inp, modified_aa, modified_nuc)?.1 {
Residue::AminoAcid(_) => {
let mut aas: Vec<AminoAcid> = Vec::new();
for _i in 0..lines {
for _j in 0..13 {
let (inp1, res) = map(take(3usize), AminoAcid::parse_fw3)(inp)?;
aas.push(res);
inp = take(1usize)(inp1)?.0;
}
inp = jump_newline(inp)?.0;
inp = take(13usize)(inp)?.0;
}
for _i in 0..last_line_items {
let (inp1, res) = map(take(3usize), AminoAcid::parse_fw3)(inp)?;
aas.push(res);
inp = take(1usize)(inp1)?.0;
}
inp = jump_newline(inp)?.0;
chains_aa.push(Chain {
id: chain,
seq: aas,
});
return Ok((inp, ()));
}
Residue::Nucleotide(_) => {
let mut nucs: Vec<Nucleotide> = Vec::new();
for _i in 0..lines {
for _j in 0..13 {
let (inp1, res) = map(take(3usize), Nucleotide::parse_fw3)(inp)?;
nucs.push(res);
inp = take(1usize)(inp1)?.0;
}
inp = jump_newline(inp)?.0;
inp = take(13usize)(inp)?.0;
}
for _i in 0..last_line_items {
let (inp1, res) = map(take(3usize), Nucleotide::parse_fw3)(inp)?;
nucs.push(res);
inp = take(1usize)(inp1)?.0;
}
inp = jump_newline(inp)?.0;
chains_nuc.push(Chain {
id: chain,
seq: nucs,
});
return Ok((inp, ()));
}
_ => panic!(format!("Invalid residue in chain: {}", unsafe {
std::str::from_utf8_unchecked(first_res)
})),
}
}
pub fn buffer_seqres<'a>(inp: &'a [u8], buffer: &mut Vec<u8>) -> IResult<&'a [u8], ()> {
let (inp, first_line) = not_line_ending(inp)?;
let (mut inp, _) = line_ending(inp)?;
buffer.extend_from_slice(&first_line);
buffer.push(b'\n');
while inp[..6] == b"SEQRES"[..] {
let (new_inp, ln) = not_line_ending(inp)?;
let (new_inp, _) = line_ending(new_inp)?;
buffer.extend_from_slice(&ln[6..]);
buffer.push(b'\n');
inp = new_inp;
}
Ok((inp, ()))
}
}