use super::lexitem::*;
use crate::error::*;
use crate::structs::*;
use crate::validate::*;
use crate::StrictnessLevel;
use crate::TransformationMatrix;
use std::fs::File;
use std::io::prelude::*;
pub fn open_mmcif(
filename: impl AsRef<str>,
level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
let filename = filename.as_ref();
let mut file = if let Ok(f) = File::open(filename) {
f
} else {
return Err(vec![PDBError::new(ErrorLevel::BreakingError, "Could not open file", "Could not open the specified file, make sure the path is correct, you have permission, and that it is not open in another program.", Context::show(filename))]);
};
let mut contents = String::new();
if let Err(e) = file.read_to_string(&mut contents) {
return Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Error while reading file",
&format!("Error: {}", e),
Context::show(filename),
)]);
}
open_mmcif_raw(&contents, level)
}
pub fn open_mmcif_raw(
input: &str,
level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
match super::lexer::lex_cif(input) {
Ok(data_block) => parse_mmcif(&data_block, level),
Err(e) => Err(vec![e]),
}
}
fn parse_mmcif(
input: &DataBlock,
level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
let mut pdb = PDB::default();
let mut errors: Vec<PDBError> = Vec::new();
let mut unit_cell = UnitCell::default();
let mut mtrix_id = None;
pdb.identifier = Some(input.name.clone());
for item in &input.items {
let result = match item {
Item::DataItem(di) => match di {
DataItem::Loop(multiple) => {
if multiple.header.contains(&"atom_site.group_PDB".to_string()) {
parse_atoms(multiple, &mut pdb)
} else {
None
}
}
DataItem::Single(single) => {
let context = Context::show(&single.name);
match &single.name[..] {
"cell.length_a" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_a(n.expect("UnitCell length a should be provided")))
.err(),
"cell.length_b" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_b(n.expect("UnitCell length b should be provided")))
.err(),
"cell.length_c" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_c(n.expect("UnitCell length c should be provided")))
.err(),
"cell.angle_alpha" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_alpha(n.expect("UnitCell angle alpha should be provided")))
.err(),
"cell.angle_beta" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_beta(n.expect("UnitCell angle beta should be provided")))
.err(),
"cell.angle_gamma" => get_f64(&single.content, &context)
.map(|n| unit_cell.set_gamma(n.expect("UnitCell angle gamma should be provided")))
.err(),
"symmetry.Int_Tables_number" | "space_group.IT_number" => {
if pdb.symmetry.is_none() {
get_usize(&single.content, &context)
.map(|n| pdb.symmetry = Symmetry::from_index(n.expect("Symmetry international tables number should be provided")))
.err()
} else if let Ok(Some(value)) = get_usize(&single.content, &context) {
if pdb.symmetry != Symmetry::from_index(value) {
Some(PDBError::new(ErrorLevel::InvalidatingError, "Space group does not match", "The given space group does not match the space group earlier defined in this file.", context.clone()))
}
else {
None
}
} else {
None
}
}
"symmetry.space_group_name_H-M" | "symmetry.space_group_name_Hall" | "space_group.name_H-M_alt" | "space_group.name_Hall" => {
if pdb.symmetry.is_none() {
get_text(&single.content, &context)
.map(|t| pdb.symmetry = Symmetry::new(t.expect("Symmetry space group name should be provided")))
.err()
} else if let Ok(Some(value)) = get_text(&single.content, &context) {
if pdb.symmetry != Symmetry::new(value) {
Some(PDBError::new(ErrorLevel::InvalidatingError, "Space group does not match", "The given space group does not match the space group earlier defined in this file.", context.clone()))
}
else {
None
}
} else {
None
}
}
s if s.starts_with("atom_sites.Cartn_transf") => {
if pdb.scale.is_none() {
pdb.scale = Some(TransformationMatrix::identity());
}
#[allow(clippy::unwrap_used)]
parse_matrix(s, get_f64(&single.content, &context),pdb.scale.as_mut().unwrap(), &context)
}
s if s.starts_with("database_PDB_matrix.origx") => {
if pdb.origx.is_none() {
pdb.origx = Some(TransformationMatrix::identity());
}
#[allow(clippy::unwrap_used)]
parse_matrix(s, get_f64(&single.content, &context),pdb.origx.as_mut().unwrap(), &context)
}
s if s.starts_with("structs_ncs_oper.") => {
if s.ends_with("id") {
match get_usize(&single.content, &context) {
Err(e) => Some(e),
Ok(Some(id)) => {
mtrix_id = Some(id);
pdb.add_mtrix(MtriX::new(id, TransformationMatrix::identity(), true));
None
}
Ok(None) => Some(PDBError::new(
ErrorLevel::InvalidatingError,
"MtriX with missing ID",
"If a MtriX id is given it should be a number not a missing value.",
context.clone(),
))
}
} else {
match mtrix_id {
Some(id) => {
let mut mtrix = pdb.mtrix_mut().find(|m| m.serial_number == id).expect("Could not find the MtriX record with the previously found `_struct_ncs_oper.id`");
if s.ends_with("code") {
match get_text(&single.content, &context) {
Ok(Some(t)) if t == "given" => {mtrix.contained = true; None},
Ok(Some(t)) if t == "generate" => {mtrix.contained = false; None},
Ok(Some(_)) => Some(PDBError::new(
ErrorLevel::InvalidatingError,
"MtriX code invalid",
"Only the values 'generate' and 'given' are valid for `_struct_ncs_oper.code`.",
context.clone(),
)),
_ => Some(PDBError::new(
ErrorLevel::InvalidatingError,
"MtriX code invalid",
"The value for `_struct_ncs_oper.code` should be a textual value.",
context.clone(),
)),
}
} else if s.ends_with("details") {
None } else {
parse_matrix(s, get_f64(&single.content, &context),&mut mtrix.transformation, &context)
}
},
None => Some(PDBError::new(
ErrorLevel::InvalidatingError,
"MtriX matrix given without ID",
"The MtriX ID (`_struct_ncs_oper.id`) should be given before any matrix information is given.",
context.clone(),
))
}
}
}
_ => None,
}
.map(|e| vec![e])
}
},
_ => None,
};
if let Some(e) = result {
errors.extend(e);
}
}
if unit_cell != UnitCell::default() {
pdb.unit_cell = Some(unit_cell);
}
reshuffle_conformers(&mut pdb);
errors.extend(validate(&pdb));
if errors.iter().any(|e| e.fails(level)) {
Err(errors)
} else {
Ok((pdb, errors))
}
}
fn parse_matrix(
name: &str,
value: Result<Option<f64>, PDBError>,
matrix: &mut TransformationMatrix,
context: &Context,
) -> Option<PDBError> {
let get_index = |n| {
if let Some(c) = name.chars().nth_back(n) {
if let Some(n) = c.to_digit(10) {
Ok((n - 1) as usize)
} else {
Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Matrix item definition incorrect",
"There are no indices into the matrix. For example this is a valid name: `_database_PDB_matrix.origx[1][1]`",
context.clone(),
))
}
} else {
Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Matrix definition too short",
"This matrix definition item name is too short to contain the matrix indices.",
context.clone(),
))
}
};
match value {
Ok(o) => {
if let Some(value) = o {
if name.matches('[').count() == 2 {
let r = match get_index(4) {
Ok(o) => o,
Err(e) => return Some(e),
};
let c = match get_index(1) {
Ok(o) => o,
Err(e) => return Some(e),
};
matrix.matrix_mut()[r][c] = value;
} else {
let r = match get_index(1) {
Ok(o) => o,
Err(e) => return Some(e),
};
matrix.matrix_mut()[r][3] = value;
}
None } else {
None }
}
Err(e) => Some(e),
}
}
fn flatten_result<T, E>(value: Result<Result<T, E>, E>) -> Result<T, E> {
match value {
Ok(Ok(t)) => Ok(t),
Ok(Err(e)) => Err(e),
Err(e) => Err(e),
}
}
fn parse_atoms(input: &Loop, pdb: &mut PDB) -> Option<Vec<PDBError>> {
#[derive(Eq, PartialEq)]
enum Mode {
Required,
Optional,
}
use Mode::{Optional, Required};
macro_rules! define_columns {
($($i:expr, $name:ident, $label:expr, $req:expr);+;) => {
$(const $name: (usize, &str, Mode) = ($i, $label, $req);)+
const COLUMNS: &[(Mode, &str)] = &[
$(($req, $name.1)),+
];
};
}
define_columns!(
0, ATOM_ALT_ID, "atom_site.label_alt_id", Optional;
1, ATOM_ANISOU_1_1, "_atom_site.aniso_U[1][1]", Optional;
2, ATOM_ANISOU_1_2, "_atom_site.aniso_U[1][2]", Optional;
3, ATOM_ANISOU_1_3, "_atom_site.aniso_U[1][3]", Optional;
4, ATOM_ANISOU_2_1, "_atom_site.aniso_U[2][1]", Optional;
5, ATOM_ANISOU_2_2, "_atom_site.aniso_U[2][2]", Optional;
6, ATOM_ANISOU_2_3, "_atom_site.aniso_U[2][3]", Optional;
7, ATOM_ANISOU_3_1, "_atom_site.aniso_U[3][1]", Optional;
8, ATOM_ANISOU_3_2, "_atom_site.aniso_U[3][2]", Optional;
9, ATOM_ANISOU_3_3, "_atom_site.aniso_U[3][3]", Optional;
10, ATOM_ASYM_ID, "atom_site.label_asym_id", Required;
11, ATOM_AUTH_ASYM_ID, "atom_site.auth_asym_id", Optional;
12, ATOM_B, "atom_site.B_iso_or_equiv", Optional;
13, ATOM_CHARGE, "atom_site.pdbx_formal_charge", Optional;
14, ATOM_COMP_ID, "atom_site.label_comp_id", Required;
15, ATOM_GROUP, "atom_site.group_PDB", Optional;
16, ATOM_ID, "atom_site.id", Required;
17, ATOM_INSERTION, "atom_site.pdbx_PDB_ins_code", Optional;
18, ATOM_MODEL, "atom_site.pdbx_PDB_model_num", Optional;
19, ATOM_NAME, "atom_site.label_atom_id", Required;
20, ATOM_OCCUPANCY, "atom_site.occupancy", Optional;
21, ATOM_SEQ_ID, "atom_site.label_seq_id", Required;
22, ATOM_AUTH_SEQ_ID, "atom_site.auth_seq_id", Optional;
23, ATOM_TYPE, "atom_site.type_symbol", Required;
24, ATOM_X, "atom_site.Cartn_x", Required;
25, ATOM_Y, "atom_site.Cartn_y", Required;
26, ATOM_Z, "atom_site.Cartn_z", Required;
);
let positions_: Vec<Result<Option<usize>, PDBError>> = COLUMNS
.iter()
.map(|tag| (input.header.iter().position(|t| t == tag.1), tag))
.map(|(pos, tag)| match pos {
Some(p) => Ok(Some(p)),
None if tag.0 == Required => Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Missing column in coordinate atoms data loop",
"The above column is missing",
Context::show(tag.1),
)),
None => Ok(None),
})
.collect();
let mut errors = positions_
.iter()
.filter_map(|i| i.clone().err())
.collect::<Vec<_>>();
if !errors.is_empty() {
return Some(errors);
}
#[allow(clippy::unwrap_used)]
let positions: Vec<Option<usize>> = positions_.iter().map(|i| *i.as_ref().unwrap()).collect();
for (index, row) in input.data.iter().enumerate() {
let values: Vec<Option<&Value>> = positions.iter().map(|i| i.map(|x| &row[x])).collect();
let context = Context::show(&format!("Main atomic data loop row: {}", index));
macro_rules! parse_column {
($type:tt, $index:tt) => {
if let Some(value) = values[$index.0] {
match $type(value, &context) {
Ok(t) => t,
Err(e) => {
errors.push(e);
None
}
}
} else {
None
}
};
}
let atom_type = parse_column!(get_text, ATOM_GROUP).unwrap_or("ATOM");
let name = parse_column!(get_text, ATOM_NAME).expect("Atom name should be provided");
let serial_number =
parse_column!(get_usize, ATOM_ID).expect("Atom serial number should be provided");
let element = parse_column!(get_text, ATOM_TYPE).expect("Atom element should be provided");
let residue_name =
parse_column!(get_text, ATOM_COMP_ID).expect("Residue name should be provided");
#[allow(clippy::cast_possible_wrap)]
let residue_number = parse_column!(get_isize, ATOM_AUTH_SEQ_ID).unwrap_or_else(|| {
parse_column!(get_isize, ATOM_SEQ_ID)
.unwrap_or_else(|| pdb.total_residue_count() as isize)
});
let chain_name = parse_column!(get_text, ATOM_AUTH_ASYM_ID).unwrap_or_else(|| {
parse_column!(get_text, ATOM_ASYM_ID).expect("Chain name should be provided")
});
let pos_x = parse_column!(get_f64, ATOM_X).expect("Atom X position should be provided");
let pos_y = parse_column!(get_f64, ATOM_Y).expect("Atom Y position should be provided");
let pos_z = parse_column!(get_f64, ATOM_Z).expect("Atom Z position should be provided");
let occupancy = parse_column!(get_f64, ATOM_OCCUPANCY).unwrap_or(1.0);
let b_factor = parse_column!(get_f64, ATOM_B).unwrap_or(1.0);
let charge = parse_column!(get_isize, ATOM_CHARGE).unwrap_or(0);
let model_number = parse_column!(get_usize, ATOM_MODEL).unwrap_or(1);
let alt_loc = parse_column!(get_text, ATOM_ALT_ID);
let insertion_code = parse_column!(get_text, ATOM_INSERTION);
let aniso_temp = [
[
parse_column!(get_f64, ATOM_ANISOU_1_1),
parse_column!(get_f64, ATOM_ANISOU_1_2),
parse_column!(get_f64, ATOM_ANISOU_1_3),
],
[
parse_column!(get_f64, ATOM_ANISOU_2_1),
parse_column!(get_f64, ATOM_ANISOU_2_2),
parse_column!(get_f64, ATOM_ANISOU_2_3),
],
[
parse_column!(get_f64, ATOM_ANISOU_3_1),
parse_column!(get_f64, ATOM_ANISOU_3_2),
parse_column!(get_f64, ATOM_ANISOU_3_3),
],
];
let aniso = if aniso_temp
.iter()
.flat_map(|l| l.iter())
.all(Option::is_some)
{
#[allow(clippy::unwrap_used)]
Some([
[
aniso_temp[0][0].unwrap(),
aniso_temp[0][1].unwrap(),
aniso_temp[0][2].unwrap(),
],
[
aniso_temp[1][0].unwrap(),
aniso_temp[1][1].unwrap(),
aniso_temp[1][2].unwrap(),
],
[
aniso_temp[2][0].unwrap(),
aniso_temp[2][1].unwrap(),
aniso_temp[2][2].unwrap(),
],
])
} else if aniso_temp
.iter()
.flat_map(|l| l.iter())
.any(Option::is_some)
{
errors.push(PDBError::new(
ErrorLevel::StrictWarning,
"Atom aniso U definition incomplete",
"For a valid anisotropic temperature factor definition all columns (1,1 up to and including 3,3) have to be defined.",
context.clone(),
));
None
} else {
None
};
let model = unsafe {
let pdb_pointer: *mut PDB = pdb;
if let Some(m) = (*pdb_pointer)
.models_mut()
.find(|m| m.serial_number() == model_number)
{
m
} else {
(*pdb_pointer).add_model(Model::new(model_number));
#[allow(clippy::unwrap_used)]
(*pdb_pointer).models_mut().rev().next().unwrap()
}
};
let mut hetero = false;
if atom_type == "ATOM" {
hetero = false;
} else if atom_type == "HETATM" {
hetero = true;
} else {
errors.push(PDBError::new(
ErrorLevel::InvalidatingError,
"Atom type not correct",
"The atom type should be ATOM or HETATM",
context.clone(),
))
}
if let Some(mut atom) = Atom::new(
hetero,
serial_number,
name,
pos_x,
pos_y,
pos_z,
occupancy,
b_factor,
element,
charge,
) {
if let Some(matrix) = aniso {
atom.set_anisotropic_temperature_factors(matrix);
}
model.add_atom(
atom,
chain_name,
(residue_number, insertion_code),
(residue_name, alt_loc),
);
} else {
errors.push(PDBError::new(
ErrorLevel::InvalidatingError,
"Atom definition incorrect",
"The atom name and element should only contain valid characters.",
context.clone(),
))
}
}
if !errors.is_empty() {
Some(errors)
} else {
None
}
}
fn get_text<'a, 'b>(value: &'a Value, context: &'b Context) -> Result<Option<&'a str>, PDBError> {
match value {
Value::Text(t) => Ok(Some(t)),
Value::Inapplicable => Ok(None),
Value::Unknown => Ok(None),
_ => Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Not text",
"",
context.clone(),
)),
}
}
fn get_f64(value: &Value, context: &Context) -> Result<Option<f64>, PDBError> {
match value {
Value::Numeric(num) => Ok(Some(*num)),
Value::Inapplicable => Ok(None),
Value::Unknown => Ok(None),
_ => Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Not a number",
"",
context.clone(),
)),
}
}
fn get_usize(value: &Value, context: &Context) -> Result<Option<usize>, PDBError> {
flatten_result(get_f64(value, context).map(|result| {
if let Some(num) = result {
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::float_cmp
)]
if (0.0..std::usize::MAX as f64).contains(&num) && num.trunc() == num {
Ok(Some(num as usize))
} else {
Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Not an unsigned integer",
"",
context.clone(),
))
}
} else {
Ok(None)
}
}))
}
fn get_isize(value: &Value, context: &Context) -> Result<Option<isize>, PDBError> {
flatten_result(get_f64(value, context).map(|result| {
if let Some(num) = result {
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::float_cmp
)]
if (std::isize::MIN as f64..std::isize::MAX as f64).contains(&num) && num.trunc() == num
{
Ok(Some(num as isize))
} else {
Err(PDBError::new(
ErrorLevel::InvalidatingError,
"Not an integer",
"",
context.clone(),
))
}
} else {
Ok(None)
}
}))
}