kira-mmcif 0.2.0

Low-level, streaming mmCIF/BinaryCIF parser focused on protein coordinates.
Documentation
use crate::model::{AtomName, ChainId, Structure};

#[derive(Debug, Clone, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ProteinIR {
    pub atoms: AtomSoA,
    pub residues: Vec<ResidueIR>,
    pub chains: Vec<ChainIR>,
}

#[derive(Debug, Clone, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct AtomSoA {
    pub x: Vec<f32>,
    pub y: Vec<f32>,
    pub z: Vec<f32>,
    pub residue_idx: Vec<u32>,
    pub atom_kind: Vec<u8>,
}

#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ResidueIR {
    pub chain_id: ChainId,
    pub residue_name: u8,
    /// Original `label_seq_id`. `i32::MIN` reserved for missing (`.`).
    pub residue_number: i32,
    pub atom_offset: u32,
    pub atom_count: u32,
    pub has_n: bool,
    pub has_ca: bool,
    pub has_c: bool,
    pub has_o: bool,
}

/// Half-open residue range `[residue_start, residue_end)` for a chain.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ChainIR {
    pub chain_id: ChainId,
    pub residue_start: u32,
    pub residue_end: u32,
}

impl ChainIR {
    pub fn residue_count(&self) -> u32 {
        self.residue_end.saturating_sub(self.residue_start)
    }
}

impl TryFrom<&Structure> for ProteinIR {
    type Error = crate::error::MmCifError;

    fn try_from(structure: &Structure) -> Result<Self, Self::Error> {
        if structure.models.len() != 1 {
            return Err(crate::error::MmCifError::InvalidModelCount(
                structure.models.len(),
            ));
        }
        let model = &structure.models[0];
        let mut atoms = AtomSoA::default();
        let mut residues = Vec::new();
        let mut chains = Vec::new();

        for chain in &model.chains {
            let chain_id = chain.id;
            let residue_start = residues.len() as u32;

            for residue in &chain.residues {
                let atom_offset = atoms.x.len() as u32;
                let mut has_n = false;
                let mut has_ca = false;
                let mut has_c = false;
                let mut has_o = false;

                for atom in &residue.atoms {
                    atoms.x.push(atom.x);
                    atoms.y.push(atom.y);
                    atoms.z.push(atom.z);
                    atoms.residue_idx.push(residues.len() as u32);
                    atoms.atom_kind.push(atom.name.as_u8());

                    match atom.name {
                        AtomName::N => has_n = true,
                        AtomName::CA => has_ca = true,
                        AtomName::C => has_c = true,
                        AtomName::O => has_o = true,
                    }
                }

                let atom_count = atoms.x.len() as u32 - atom_offset;
                residues.push(ResidueIR {
                    chain_id,
                    residue_name: residue.name.as_u8(),
                    residue_number: residue.seq_id.unwrap_or(i32::MIN),
                    atom_offset,
                    atom_count,
                    has_n,
                    has_ca,
                    has_c,
                    has_o,
                });
            }

            chains.push(ChainIR {
                chain_id,
                residue_start,
                residue_end: residues.len() as u32,
            });
        }

        Ok(Self {
            atoms,
            residues,
            chains,
        })
    }
}