use chematic_core::{AtomIdx, BondOrder, Element, Molecule};
use chematic_perception::ring_sizes_for_atom;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NumericTypeError(pub String);
impl std::fmt::Display for NumericTypeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "MMFF94 numeric type error: {}", self.0)
}
}
static MMFF94_PBCI: &[(u8, f64, f64)] = &[
(1, 0.000, 0.000), (2, -0.135, 0.000), (3, -0.095, 0.000), (4, -0.200, 0.000), (5, -0.023, 0.000), (6, -0.243, 0.000), (7, -0.687, 0.000), (8, -0.253, 0.000), (9, -0.306, 0.000), (10, -0.244, 0.000), (11, -0.317, 0.000), (12, -0.304, 0.000), (13, -0.238, 0.000), (14, -0.208, 0.000), (15, -0.236, 0.000), (16, -0.475, 0.000), (17, -0.191, 0.000), (18, -0.118, 0.000), (19, 0.094, 0.000), (20, -0.019, 0.000), (21, 0.157, 0.000), (22, -0.095, 0.000), (23, 0.193, 0.000), (24, 0.257, 0.000), (25, 0.012, 0.000), (26, -0.142, 0.000), (27, 0.094, 0.000), (28, 0.058, 0.000), (29, 0.207, 0.000), (30, -0.166, 0.000), (31, 0.161, 0.000), (32, -0.732, 0.500), (33, 0.257, 0.000), (34, -0.491, 0.000), (35, -0.456, 0.500), (36, -0.031, 0.000), (37, -0.127, 0.000), (38, -0.437, 0.000), (39, -0.104, 0.000), (40, -0.264, 0.000), (41, 0.052, 0.000), (42, -0.757, 0.000), (43, -0.326, 0.000), (44, -0.237, 0.000), (45, -0.260, 0.000), (46, -0.429, 0.000), (47, -0.418, 0.000), (48, -0.525, 0.000), (49, -0.283, 0.000), (50, 0.284, 0.000), (51, -1.046, 0.000), (52, -0.546, 0.000), (53, -0.048, 0.000), (54, -0.424, 0.000), (55, -0.476, 0.000), (56, -0.438, 0.000), (57, -0.105, 0.000), (58, -0.488, 0.000), (59, -0.337, 0.000), (60, -0.635, 0.000), (61, -0.265, 0.000), (62, -0.125, 0.250), (63, -0.180, 0.000), (64, -0.181, 0.000), (65, -0.475, 0.000), (66, -0.467, 0.000), (67, -0.099, 0.000), (68, -0.135, 0.000), (69, -0.099, 0.000), (70, -0.269, 0.000), (71, -0.071, 0.000), (72, -0.580, 0.500), (73, -0.200, 0.000), (74, -0.301, 0.000), (75, -0.255, 0.000), (76, -0.568, 0.250), (77, -0.282, 0.000), (78, -0.168, 0.000), (79, -0.471, 0.000), (80, -0.144, 0.000), (81, -0.514, 0.000), (82, -0.099, 0.000), (83, 0.000, 0.000), (84, 0.000, 0.000), (85, 0.000, 0.000), (86, 0.000, 0.000), (87, 2.000, 0.000), (88, 3.000, 0.000), (89, -1.000, 0.000), (90, -1.000, 0.000), (91, -1.000, 0.000), (92, 1.000, 0.000), (93, 1.000, 0.000), (94, 1.000, 0.000), (95, 2.000, 0.000), (96, 2.000, 0.000), (97, 1.000, 0.000), (98, 2.000, 0.000), (99, 2.000, 0.000), ];
static MMFF94_CHG: &[(u8, u8, u8, f64)] = &[
(0, 1, 1, 0.0),
(0, 1, 2, -0.1382),
(0, 1, 3, -0.061),
(0, 1, 4, -0.2),
(0, 1, 5, 0.0),
(0, 1, 6, -0.28),
(0, 1, 8, -0.27),
(0, 1, 9, -0.246),
(0, 1, 10, -0.3001),
(0, 1, 11, -0.34),
(0, 1, 12, -0.29),
(0, 1, 13, -0.23),
(0, 1, 14, -0.19),
(0, 1, 15, -0.23),
(0, 1, 17, -0.1935),
(0, 1, 18, -0.1052),
(0, 1, 19, 0.0805),
(0, 1, 20, 0.0),
(0, 1, 22, -0.095),
(0, 1, 25, 0.0),
(0, 1, 26, -0.1669),
(0, 1, 34, -0.503),
(0, 1, 35, -0.4274),
(0, 1, 37, -0.1435),
(0, 1, 39, -0.2556),
(0, 1, 40, -0.3691),
(0, 1, 41, 0.106),
(0, 1, 43, -0.3557),
(0, 1, 45, -0.2402),
(0, 1, 46, -0.3332),
(0, 1, 54, -0.3461),
(0, 1, 55, -0.4895),
(0, 1, 56, -0.3276),
(0, 1, 57, -0.105),
(0, 1, 58, -0.488),
(0, 1, 61, -0.2657),
(0, 1, 62, -0.2),
(0, 1, 63, -0.18),
(0, 1, 64, -0.181),
(0, 1, 67, -0.099),
(0, 1, 68, -0.256),
(0, 1, 72, -0.55),
(0, 1, 73, -0.0877),
(0, 1, 75, -0.255),
(0, 1, 78, -0.168),
(0, 1, 80, -0.144),
(0, 1, 81, -0.514),
(0, 2, 2, 0.0),
(1, 2, 2, 0.0),
(1, 2, 3, -0.0144),
(0, 2, 4, -0.065),
(1, 2, 4, -0.065),
(0, 2, 5, 0.15),
(0, 2, 6, -0.0767),
(1, 2, 9, -0.171),
(0, 2, 10, -0.109),
(0, 2, 11, -0.1495),
(0, 2, 12, -0.14),
(0, 2, 13, -0.11),
(0, 2, 14, -0.09),
(0, 2, 15, -0.101),
(0, 2, 17, -0.056),
(0, 2, 18, 0.017),
(0, 2, 19, 0.229),
(0, 2, 20, 0.116),
(0, 2, 22, 0.04),
(0, 2, 25, 0.147),
(0, 2, 30, -0.031),
(0, 2, 34, -0.356),
(0, 2, 35, -0.35),
(1, 2, 37, 0.0284),
(1, 2, 39, 0.031),
(0, 2, 40, -0.1),
(0, 2, 41, 0.25),
(0, 2, 43, -0.191),
(0, 2, 45, -0.2044),
(0, 2, 46, -0.294),
(0, 2, 55, -0.341),
(0, 2, 56, -0.303),
(0, 2, 62, -0.05),
(1, 2, 63, -0.045),
(1, 2, 64, -0.046),
(1, 2, 67, 0.036),
(0, 2, 72, -0.45),
(1, 2, 81, -0.379),
(1, 3, 3, 0.0),
(1, 3, 4, -0.105),
(0, 3, 5, 0.06),
(0, 3, 6, -0.15),
(0, 3, 7, -0.57),
(0, 3, 9, -0.45),
(1, 3, 9, -0.211),
(0, 3, 10, -0.06),
(0, 3, 11, -0.222),
(0, 3, 12, -0.209),
(0, 3, 15, -0.141),
(0, 3, 16, -0.38),
(0, 3, 17, -0.096),
(0, 3, 18, -0.023),
(0, 3, 20, 0.053),
(0, 3, 22, 0.0),
(0, 3, 25, 0.107),
(1, 3, 30, -0.071),
(0, 3, 35, -0.361),
(1, 3, 37, 0.0862),
(1, 3, 39, -0.009),
(0, 3, 40, -0.05),
(0, 3, 41, 0.147),
(0, 3, 43, -0.2363),
(0, 3, 45, -0.165),
(0, 3, 48, -0.43),
(0, 3, 51, -0.95),
(0, 3, 53, -0.0134),
(0, 3, 54, -0.4),
(1, 3, 54, -0.329),
(0, 3, 55, -0.381),
(0, 3, 56, -0.343),
(1, 3, 57, -0.01),
(1, 3, 58, -0.393),
(0, 3, 62, -0.03),
(1, 3, 63, -0.085),
(1, 3, 64, -0.086),
(0, 3, 67, -0.004),
(0, 3, 74, -0.319),
(0, 3, 75, -0.2474),
(1, 3, 78, -0.073),
(1, 3, 80, -0.049),
(0, 4, 5, 0.177),
(0, 4, 6, -0.043),
(0, 4, 7, -0.487),
(0, 4, 9, -0.3),
(1, 4, 9, -0.106),
(0, 4, 10, -0.044),
(0, 4, 15, -0.036),
(0, 4, 20, 0.181),
(0, 4, 22, 0.105),
(0, 4, 30, 0.034),
(1, 4, 37, 0.073),
(0, 4, 40, -0.064),
(0, 4, 42, -0.5571),
(0, 4, 43, -0.126),
(1, 4, 63, 0.02),
(1, 4, 64, 0.019),
(0, 5, 19, 0.2),
(0, 5, 20, 0.0),
(0, 5, 22, -0.1),
(0, 5, 30, -0.15),
(0, 5, 37, -0.15),
(0, 5, 41, 0.2203),
(0, 5, 57, -0.15),
(0, 5, 63, -0.15),
(0, 5, 64, -0.15),
(0, 5, 78, -0.15),
(0, 5, 80, -0.15),
(0, 6, 6, 0.0),
(0, 6, 8, -0.1),
(0, 6, 9, -0.063),
(0, 6, 10, 0.0355),
(0, 6, 15, 0.007),
(0, 6, 17, 0.052),
(0, 6, 18, 0.1837),
(0, 6, 19, 0.2974),
(0, 6, 20, 0.2579),
(0, 6, 21, 0.4),
(0, 6, 22, 0.148),
(0, 6, 24, 0.5),
(0, 6, 25, 0.2712),
(0, 6, 26, 0.101),
(0, 6, 29, 0.45),
(0, 6, 30, 0.077),
(0, 6, 33, 0.5),
(0, 6, 37, 0.0825),
(0, 6, 39, 0.139),
(0, 6, 40, -0.021),
(0, 6, 41, 0.295),
(0, 6, 43, -0.083),
(0, 6, 45, -0.009),
(0, 6, 54, -0.181),
(0, 6, 55, -0.233),
(0, 6, 57, 0.138),
(0, 6, 58, -0.245),
(0, 6, 63, 0.063),
(0, 6, 64, 0.062),
(0, 7, 17, 0.5),
(0, 7, 46, 0.1618),
(0, 7, 74, 0.5),
(0, 8, 8, 0.0),
(0, 8, 9, -0.053),
(0, 8, 10, 0.009),
(0, 8, 12, -0.051),
(0, 8, 15, 0.017),
(0, 8, 17, 0.062),
(0, 8, 19, 0.347),
(0, 8, 20, 0.2096),
(0, 8, 22, 0.158),
(0, 8, 23, 0.36),
(0, 8, 25, 0.2679),
(0, 8, 26, 0.111),
(0, 8, 34, -0.238),
(0, 8, 39, 0.149),
(0, 8, 40, -0.011),
(0, 8, 43, -0.073),
(0, 8, 45, -0.007),
(0, 8, 46, -0.176),
(0, 8, 55, -0.223),
(0, 8, 56, -0.185),
(0, 9, 9, 0.0),
(0, 9, 10, 0.062),
(0, 9, 12, 0.002),
(0, 9, 15, 0.07),
(0, 9, 18, 0.188),
(0, 9, 19, 0.4),
(0, 9, 20, 0.287),
(0, 9, 25, 0.318),
(0, 9, 27, 0.4),
(0, 9, 34, -0.185),
(0, 9, 35, -0.15),
(1, 9, 37, 0.179),
(1, 9, 39, 0.202),
(0, 9, 40, 0.042),
(0, 9, 41, 0.358),
(0, 9, 45, 0.046),
(0, 9, 53, 0.3179),
(0, 9, 54, -0.118),
(0, 9, 55, -0.17),
(0, 9, 56, -0.132),
(1, 9, 57, 0.201),
(0, 9, 62, 0.181),
(1, 9, 63, 0.126),
(1, 9, 64, 0.125),
(0, 9, 67, 0.207),
(1, 9, 78, 0.138),
(1, 9, 81, -0.208),
(0, 10, 10, 0.0),
(0, 10, 13, 0.006),
(0, 10, 14, 0.036),
(0, 10, 15, 0.008),
(0, 10, 17, 0.053),
(0, 10, 20, 0.225),
(0, 10, 22, 0.149),
(0, 10, 25, 0.256),
(0, 10, 26, 0.102),
(0, 10, 28, 0.37),
(0, 10, 34, -0.247),
(0, 10, 35, -0.212),
(0, 10, 37, 0.117),
(0, 10, 39, 0.14),
(0, 10, 40, -0.02),
(0, 10, 41, 0.296),
(0, 10, 45, -0.016),
(0, 10, 63, 0.064),
(0, 10, 64, 0.063),
(0, 11, 20, 0.298),
(0, 11, 22, 0.2317),
(0, 11, 25, 0.329),
(0, 11, 26, 0.175),
(0, 11, 37, 0.19),
(0, 11, 40, 0.053),
(0, 12, 15, 0.068),
(0, 12, 18, 0.186),
(0, 12, 19, 0.3701),
(0, 12, 20, 0.29),
(0, 12, 22, 0.2273),
(0, 12, 25, 0.316),
(0, 12, 26, 0.2112),
(0, 12, 37, 0.177),
(0, 12, 40, 0.04),
(0, 12, 57, 0.199),
(0, 12, 63, 0.124),
(0, 12, 64, 0.123),
(0, 13, 20, 0.219),
(0, 13, 22, 0.143),
(0, 13, 37, 0.111),
(0, 13, 64, 0.057),
(0, 14, 20, 0.189),
(0, 14, 37, 0.081),
(0, 15, 15, 0.0),
(0, 15, 18, 0.118),
(0, 15, 19, 0.33),
(0, 15, 20, 0.217),
(0, 15, 22, 0.141),
(0, 15, 25, 0.248),
(0, 15, 26, 0.094),
(0, 15, 30, 0.07),
(0, 15, 37, 0.1015),
(0, 15, 40, -0.028),
(0, 15, 43, -0.09),
(0, 15, 57, 0.131),
(0, 15, 63, 0.056),
(0, 15, 64, 0.055),
(0, 15, 71, 0.18),
(0, 16, 16, 0.0),
(0, 17, 17, 0.0),
(0, 17, 20, 0.172),
(0, 17, 22, 0.096),
(0, 17, 37, 0.064),
(0, 17, 43, -0.135),
(0, 18, 18, 0.0),
(0, 18, 20, 0.099),
(0, 18, 22, 0.023),
(0, 18, 32, -0.65),
(0, 18, 37, -0.009),
(0, 18, 39, 0.014),
(0, 18, 43, -0.138),
(0, 18, 48, -0.5895),
(0, 18, 55, -0.358),
(0, 18, 58, -0.37),
(0, 18, 62, 0.2099),
(0, 18, 63, -0.062),
(0, 18, 64, -0.063),
(0, 18, 80, -0.026),
(0, 19, 19, 0.0),
(0, 19, 20, -0.113),
(0, 19, 37, -0.221),
(0, 19, 40, -0.358),
(0, 19, 63, -0.274),
(0, 19, 75, -0.349),
(0, 20, 20, 0.0),
(0, 20, 22, -0.076),
(0, 20, 25, 0.031),
(0, 20, 26, -0.123),
(0, 20, 30, -0.138),
(0, 20, 34, -0.472),
(0, 20, 37, -0.108),
(0, 20, 40, -0.245),
(0, 20, 41, 0.071),
(0, 20, 43, -0.307),
(0, 20, 45, -0.241),
(0, 22, 22, 0.0),
(0, 22, 30, -0.071),
(0, 22, 34, -0.396),
(0, 22, 37, -0.032),
(0, 22, 40, -0.169),
(0, 22, 41, 0.147),
(0, 22, 43, -0.231),
(0, 22, 45, -0.165),
(0, 23, 39, -0.27),
(0, 23, 62, -0.4),
(0, 23, 67, -0.292),
(0, 23, 68, -0.36),
(0, 25, 25, 0.0),
(0, 25, 32, -0.7),
(0, 25, 37, -0.139),
(0, 25, 39, -0.116),
(0, 25, 40, -0.276),
(0, 25, 43, -0.338),
(0, 25, 57, -0.117),
(0, 25, 63, -0.192),
(0, 25, 71, -0.0362),
(0, 25, 72, -0.6773),
(0, 26, 26, 0.0),
(0, 26, 34, -0.349),
(0, 26, 37, 0.015),
(0, 26, 40, -0.122),
(0, 26, 71, 0.096),
(0, 28, 40, -0.4),
(0, 28, 43, -0.42),
(0, 28, 48, -0.4),
(0, 30, 30, 0.0),
(0, 30, 40, -0.098),
(1, 30, 67, 0.067),
(0, 31, 70, -0.43),
(0, 32, 41, 0.65),
(0, 32, 45, 0.52),
(0, 32, 67, 0.633),
(0, 32, 68, 0.75),
(0, 32, 69, 0.75),
(0, 32, 73, 0.35),
(0, 32, 77, 0.45),
(0, 32, 82, 0.633),
(0, 34, 36, 0.45),
(0, 34, 37, 0.364),
(0, 34, 43, 0.165),
(0, 35, 37, 0.329),
(0, 35, 63, 0.276),
(0, 36, 54, -0.4),
(0, 36, 55, -0.45),
(0, 36, 56, -0.45),
(0, 36, 58, -0.457),
(4, 36, 58, -0.45),
(0, 36, 81, -0.45),
(0, 37, 37, 0.0),
(1, 37, 37, 0.0),
(0, 37, 38, -0.31),
(0, 37, 39, 0.023),
(1, 37, 39, 0.023),
(0, 37, 40, -0.1),
(0, 37, 41, 0.179),
(0, 37, 43, -0.199),
(0, 37, 45, -0.133),
(0, 37, 46, -0.302),
(0, 37, 55, -0.349),
(0, 37, 56, -0.311),
(1, 37, 57, 0.022),
(0, 37, 58, -0.361),
(1, 37, 58, -0.361),
(4, 37, 58, -0.35),
(0, 37, 61, -0.138),
(0, 37, 62, 0.002),
(0, 37, 63, 0.0),
(1, 37, 63, -0.053),
(0, 37, 64, 0.0),
(1, 37, 64, -0.054),
(1, 37, 67, 0.028),
(0, 37, 69, -0.0895),
(0, 37, 78, -0.041),
(0, 37, 81, -0.387),
(1, 37, 81, -0.387),
(0, 38, 38, 0.0),
(0, 38, 63, 0.257),
(0, 38, 64, 0.256),
(0, 38, 69, 0.338),
(0, 38, 78, 0.269),
(1, 39, 39, 0.0),
(0, 39, 40, -0.16),
(0, 39, 45, -0.156),
(0, 39, 63, -0.1516),
(1, 39, 63, -0.076),
(0, 39, 64, -0.077),
(1, 39, 64, -0.077),
(0, 39, 65, -0.418),
(0, 39, 78, -0.064),
(0, 40, 40, 0.0),
(0, 40, 45, 0.004),
(0, 40, 46, -0.165),
(0, 40, 54, -0.16),
(0, 40, 63, 0.084),
(0, 40, 64, 0.083),
(0, 40, 78, 0.096),
(0, 41, 41, 0.0),
(0, 41, 55, -0.528),
(0, 41, 62, -0.177),
(0, 41, 72, -0.5),
(0, 41, 80, -0.196),
(0, 42, 61, 0.492),
(0, 43, 43, 0.0),
(0, 43, 45, 0.066),
(0, 43, 64, 0.145),
(0, 44, 63, 0.04),
(0, 44, 65, -0.2207),
(0, 44, 78, 0.069),
(0, 44, 80, 0.093),
(0, 45, 63, 0.08),
(0, 45, 64, 0.079),
(0, 45, 78, 0.092),
(0, 47, 53, 0.37),
(0, 49, 50, 0.5673),
(0, 51, 52, 0.5),
(0, 55, 57, 0.3544),
(0, 55, 62, 0.351),
(0, 55, 64, 0.295),
(0, 55, 80, 0.332),
(0, 56, 57, 0.4),
(0, 56, 63, 0.258),
(0, 56, 80, 0.27),
(4, 57, 58, -0.4),
(1, 57, 63, -0.075),
(1, 57, 64, -0.076),
(0, 58, 63, 0.308),
(0, 58, 64, 0.307),
(0, 59, 63, 0.14),
(0, 59, 65, -0.1209),
(0, 59, 78, 0.169),
(0, 59, 80, 0.193),
(0, 59, 82, 0.238),
(0, 60, 61, 0.37),
(0, 62, 63, -0.055),
(0, 62, 64, -0.056),
(0, 63, 63, 0.0),
(1, 63, 63, 0.0),
(0, 63, 64, 0.0),
(0, 63, 66, -0.3381),
(0, 63, 72, -0.4),
(0, 63, 78, 0.012),
(0, 63, 81, -0.334),
(0, 64, 64, 0.0),
(0, 64, 65, -0.2888),
(0, 64, 66, -0.2272),
(0, 64, 78, 0.013),
(0, 64, 81, -0.333),
(0, 64, 82, 0.082),
(0, 65, 66, 0.0),
(0, 65, 78, 0.307),
(0, 65, 81, -0.039),
(0, 65, 82, 0.376),
(0, 66, 66, 0.0),
(0, 66, 78, 0.299),
(0, 66, 81, -0.047),
(0, 71, 75, -0.0958),
(0, 72, 73, 0.45),
(0, 76, 76, 0.0),
(0, 76, 78, 0.4),
(0, 78, 78, 0.0),
(1, 78, 78, 0.0),
(0, 78, 79, -0.303),
(0, 78, 81, -0.35),
(0, 79, 81, -0.043),
(0, 80, 81, -0.4),
];
pub fn pbci_for(atom_type: u8) -> (f64, f64) {
for &(t, pbci, fcadj) in MMFF94_PBCI {
if t == atom_type {
return (pbci, fcadj);
}
}
(0.0, 0.0)
}
fn lookup_chg_contribution(bond_type: u8, type_i: u8, type_j: u8) -> Option<f64> {
for &(bt, a, b, bci) in MMFF94_CHG {
if bt == bond_type && a == type_j && b == type_i {
return Some(bci); }
}
for &(bt, a, b, bci) in MMFF94_CHG {
if bt == bond_type && a == type_i && b == type_j {
return Some(-bci); }
}
None
}
fn bond_type_for(order: BondOrder) -> u8 {
match order {
BondOrder::Single | BondOrder::Up | BondOrder::Down => 0,
BondOrder::Double => 1,
BondOrder::Triple => 2,
BondOrder::Aromatic => 4,
_ => 0,
}
}
pub fn assign_mmff94_numeric_types(mol: &Molecule) -> Result<Vec<u8>, NumericTypeError> {
let n = mol.atom_count();
let mut types = vec![0u8; n];
for i in 0..n {
let idx = AtomIdx(i as u32);
let atom = mol.atom(idx);
let t = match atom.element {
Element::C => assign_c_type(mol, idx)?,
Element::N => assign_n_type(mol, idx)?,
Element::O => assign_o_type(mol, idx)?,
Element::S => assign_s_type(mol, idx)?,
Element::P => assign_p_type(mol, idx)?,
Element::SI => 19,
Element::F => 11,
Element::CL => 12,
Element::BR => 13,
Element::I => 14,
Element::H => assign_h_type(mol, idx)?,
_ => return Err(NumericTypeError(format!(
"unsupported element {:?} at atom {i}", atom.element
))),
};
types[i] = t;
}
Ok(types)
}
struct BondInfo {
neighbor: AtomIdx,
order: BondOrder,
}
fn bonds_of(mol: &Molecule, idx: AtomIdx) -> Vec<BondInfo> {
mol.bonds()
.filter_map(|(_, b)| {
if b.atom1 == idx {
Some(BondInfo { neighbor: b.atom2, order: b.order })
} else if b.atom2 == idx {
Some(BondInfo { neighbor: b.atom1, order: b.order })
} else {
None
}
})
.collect()
}
fn count_bond_order(mol: &Molecule, idx: AtomIdx, order: BondOrder) -> usize {
bonds_of(mol, idx).iter().filter(|b| b.order == order).count()
}
fn neighbor_elements(mol: &Molecule, idx: AtomIdx) -> Vec<Element> {
bonds_of(mol, idx).iter().map(|b| mol.atom(b.neighbor).element).collect()
}
fn is_bonded_to(mol: &Molecule, idx: AtomIdx, elem: Element, order: BondOrder) -> bool {
bonds_of(mol, idx).iter().any(|b| mol.atom(b.neighbor).element == elem && b.order == order)
}
fn is_neighbor(mol: &Molecule, idx: AtomIdx, elem: Element) -> bool {
neighbor_elements(mol, idx).contains(&elem)
}
fn is_in_aromatic_ring_of_size(mol: &Molecule, idx: AtomIdx, sz: usize) -> bool {
ring_sizes_for_atom(mol, idx.0 as usize)
.into_iter()
.any(|s| s == sz && mol.atom(idx).aromatic)
}
fn assign_c_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
let atom = mol.atom(idx);
if atom.aromatic {
return Ok(aromatic_c_type(mol, idx));
}
let double_bonds = count_bond_order(mol, idx, BondOrder::Double);
let triple_bonds = count_bond_order(mol, idx, BondOrder::Triple);
if triple_bonds > 0 {
return Ok(4); }
if double_bonds > 0 {
if is_bonded_to(mol, idx, Element::O, BondOrder::Double)
|| is_bonded_to(mol, idx, Element::S, BondOrder::Double)
{
return Ok(3); }
return Ok(2); }
Ok(1) }
fn aromatic_c_type(mol: &Molecule, idx: AtomIdx) -> u8 {
let ring_sizes = ring_sizes_for_atom(mol, idx.0 as usize);
let in_6 = ring_sizes.iter().any(|&s| s == 6);
let in_5 = ring_sizes.iter().any(|&s| s == 5);
if in_6 && !in_5 {
return 63; }
if in_5 {
let has_hetero_neighbor = neighbor_elements(mol, idx)
.into_iter()
.any(|e| matches!(e, Element::N | Element::O | Element::S));
if has_hetero_neighbor {
return 37; }
return 38; }
64 }
fn assign_n_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
let atom = mol.atom(idx);
if atom.aromatic {
return Ok(aromatic_n_type(mol, idx));
}
let double_bonds = count_bond_order(mol, idx, BondOrder::Double);
let triple_bonds = count_bond_order(mol, idx, BondOrder::Triple);
let nbrs = bonds_of(mol, idx);
if atom.charge > 0 {
return Ok(32); }
if triple_bonds > 0 {
return Ok(9); }
if double_bonds > 0 {
return Ok(9); }
let is_amide = nbrs.iter().any(|b| {
let nbr = mol.atom(b.neighbor);
nbr.element == Element::C && {
bonds_of(mol, b.neighbor)
.iter()
.any(|bb| bb.order == BondOrder::Double && mol.atom(bb.neighbor).element == Element::O)
}
});
if is_amide {
return Ok(10); }
let double_o = bonds_of(mol, idx)
.iter()
.filter(|b| b.order == BondOrder::Double && mol.atom(b.neighbor).element == Element::O)
.count();
if double_o >= 2 {
return Ok(46); }
Ok(8) }
fn aromatic_n_type(mol: &Molecule, idx: AtomIdx) -> u8 {
let ring_sizes = ring_sizes_for_atom(mol, idx.0 as usize);
let in_5 = ring_sizes.iter().any(|&s| s == 5);
let has_h = is_neighbor(mol, idx, Element::H);
if in_5 {
if has_h {
return 40; }
return 58; }
67 }
fn assign_o_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
if count_bond_order(mol, idx, BondOrder::Double) > 0 {
return Ok(7); }
if mol.atom(idx).charge < 0 {
return Ok(34); }
Ok(6) }
fn assign_s_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
let atom = mol.atom(idx);
if atom.aromatic {
return Ok(44); }
let double_o = bonds_of(mol, idx)
.iter()
.filter(|b| b.order == BondOrder::Double && mol.atom(b.neighbor).element == Element::O)
.count();
match double_o {
2.. => Ok(18), 1 => Ok(17), 0 => {
if count_bond_order(mol, idx, BondOrder::Double) > 0 {
return Ok(16); }
Ok(15) }
}
}
fn assign_p_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
if is_bonded_to(mol, idx, Element::O, BondOrder::Double) {
return Ok(25); }
Ok(20) }
fn assign_h_type(mol: &Molecule, idx: AtomIdx) -> Result<u8, NumericTypeError> {
let nbrs = bonds_of(mol, idx);
if nbrs.is_empty() {
return Ok(5); }
let nbr_atom = mol.atom(nbrs[0].neighbor);
Ok(match nbr_atom.element {
Element::C => 5, Element::O => 24, Element::S => 71, Element::N => {
let n_idx = nbrs[0].neighbor;
let n_atom = mol.atom(n_idx);
if n_atom.aromatic {
return Ok(23); }
let n_is_amide = bonds_of(mol, n_idx).iter().any(|b| {
b.order == BondOrder::Single && mol.atom(b.neighbor).element == Element::C
&& bonds_of(mol, b.neighbor).iter().any(|bb| {
bb.order == BondOrder::Double
&& mol.atom(bb.neighbor).element == Element::O
})
});
if n_is_amide {
28 } else if count_bond_order(mol, n_idx, BondOrder::Double) > 0 {
27 } else {
23 }
}
_ => 5,
})
}
pub fn mmff94_charges_numeric(
mol: &Molecule,
) -> Result<Vec<f64>, NumericTypeError> {
let types = assign_mmff94_numeric_types(mol)?;
let n = mol.atom_count();
let mut charges = vec![0.0f64; n];
for i in 0..n {
let idx = AtomIdx(i as u32);
let atom = mol.atom(idx);
let (_, fcadj) = pbci_for(types[i]);
let q0 = atom.charge as f64;
let m = bonds_of(mol, idx).len() as f64;
charges[i] = (1.0 - m * fcadj) * q0;
}
for (_, bond) in mol.bonds() {
let i = bond.atom1.0 as usize;
let j = bond.atom2.0 as usize;
let ti = types[i];
let tj = types[j];
let bt = bond_type_for(bond.order);
let ci = lookup_chg_contribution(bt, ti, tj)
.unwrap_or_else(|| pbci_for(ti).0 - pbci_for(tj).0);
let cj = lookup_chg_contribution(bt, tj, ti)
.unwrap_or_else(|| pbci_for(tj).0 - pbci_for(ti).0);
charges[i] += ci;
charges[j] += cj;
}
for i in 0..n {
let idx = AtomIdx(i as u32);
let (_, fcadj_i) = pbci_for(types[i]);
let m = bonds_of(mol, idx).len() as f64;
if fcadj_i > 0.0 {
let sum_fc: f64 = bonds_of(mol, idx)
.iter()
.map(|b| mol.atom(b.neighbor).charge as f64)
.sum();
charges[i] += fcadj_i * sum_fc;
}
for b in bonds_of(mol, idx) {
let nbr = mol.atom(b.neighbor);
if nbr.charge < 0 {
let deg = bonds_of(mol, b.neighbor).len() as f64;
charges[i] += (nbr.charge as f64) / (2.0 * deg);
}
}
}
Ok(charges)
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
fn mol(s: &str) -> Molecule {
parse(s).unwrap()
}
#[test]
fn glycine_types_match_mmff94_reference() {
let m = mol("NCC(=O)O");
let types = assign_mmff94_numeric_types(&m).unwrap();
let mut n_types: Vec<u8> = Vec::new();
let mut c_types: Vec<u8> = Vec::new();
let mut o_types: Vec<u8> = Vec::new();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
match a.element {
Element::N => n_types.push(types[i]),
Element::C => c_types.push(types[i]),
Element::O => o_types.push(types[i]),
_ => {}
}
}
assert!(n_types.iter().all(|&t| t == 8), "N should be type 8 (NR), got {:?}", n_types);
assert!(c_types.contains(&1), "should have sp3 C (type 1), got {:?}", c_types);
assert!(c_types.contains(&3), "should have carbonyl C (type 3), got {:?}", c_types);
assert!(o_types.contains(&6), "should have OR oxygen (type 6), got {:?}", o_types);
assert!(o_types.contains(&7), "should have O=C oxygen (type 7), got {:?}", o_types);
}
#[test]
fn benzene_aromatic_c_is_type_63() {
let m = mol("c1ccccc1");
let types = assign_mmff94_numeric_types(&m).unwrap();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
if a.element == Element::C {
assert_eq!(types[i], 63, "benzene C should be type 63 (CB)");
}
}
}
#[test]
fn pyridine_n_is_type_67() {
let m = mol("c1ccncc1");
let types = assign_mmff94_numeric_types(&m).unwrap();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
if a.element == Element::N {
assert_eq!(types[i], 67, "pyridine N should be type 67 (N6A)");
}
}
}
#[test]
fn halogens_map_correctly() {
let m = mol("CF");
let types = assign_mmff94_numeric_types(&m).unwrap();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
match a.element {
Element::F => assert_eq!(types[i], 11),
Element::C => assert_eq!(types[i], 1),
_ => {}
}
}
let m2 = mol("CCl");
let types2 = assign_mmff94_numeric_types(&m2).unwrap();
for i in 0..m2.atom_count() {
if m2.atom(AtomIdx(i as u32)).element == Element::CL {
assert_eq!(types2[i], 12);
}
}
}
#[test]
fn amide_n_is_type_10() {
let m = mol("NC(=O)C");
let types = assign_mmff94_numeric_types(&m).unwrap();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
if a.element == Element::N {
assert_eq!(types[i], 10, "amide N should be type 10 (NC=O)");
}
}
}
#[test]
fn sulfoxide_is_type_17_sulfone_is_type_18() {
let m_so = mol("CS(=O)C"); let types_so = assign_mmff94_numeric_types(&m_so).unwrap();
let m_s2 = mol("CS(=O)(=O)C"); let types_s2 = assign_mmff94_numeric_types(&m_s2).unwrap();
for i in 0..m_so.atom_count() {
if m_so.atom(AtomIdx(i as u32)).element == Element::S {
assert_eq!(types_so[i], 17, "DMSO S should be type 17 (SO)");
}
}
for i in 0..m_s2.atom_count() {
if m_s2.atom(AtomIdx(i as u32)).element == Element::S {
assert_eq!(types_s2[i], 18, "DMSO2 S should be type 18 (SO2)");
}
}
}
#[test]
fn charge_sum_equals_formal_charge_methane() {
let m = mol("C");
let q = mmff94_charges_numeric(&m).unwrap();
let total: f64 = q.iter().sum();
assert!(total.abs() < 0.1, "methane net charge = {total:.4}");
}
#[test]
fn charge_sum_equals_formal_charge_glycine() {
let m = mol("NCC(=O)O");
let q = mmff94_charges_numeric(&m).unwrap();
let total: f64 = q.iter().sum();
assert!(total.abs() < 0.15, "glycine net charge = {total:.4}");
}
#[test]
fn carbonyl_oxygen_is_most_negative() {
let m = mol("CC(=O)C");
let types = assign_mmff94_numeric_types(&m).unwrap();
let q = mmff94_charges_numeric(&m).unwrap();
let (o_idx, _) = m.atoms()
.find(|(_, a)| a.element == Element::O)
.unwrap();
let o_charge = q[o_idx.0 as usize];
assert!(o_charge < -0.3, "ketone O charge = {o_charge:.3}, expected < -0.3");
assert_eq!(types[o_idx.0 as usize], 7, "ketone O should be type 7");
}
#[test]
fn amine_n_is_negative() {
let m = mol("CCN");
let q = mmff94_charges_numeric(&m).unwrap();
let n_charge = m.atoms()
.find(|(_, a)| a.element == Element::N)
.map(|(i, _)| q[i.0 as usize])
.unwrap();
assert!(n_charge < -0.1, "amine N charge = {n_charge:.3}, expected negative");
}
#[test]
fn h_on_nitrogen_is_positive() {
let m = mol("C[NH2]");
let q = mmff94_charges_numeric(&m).unwrap();
let types = assign_mmff94_numeric_types(&m).unwrap();
let h_charges: Vec<f64> = m.atoms()
.filter(|(i, a)| a.element == Element::H && types[i.0 as usize] == 23)
.map(|(i, _)| q[i.0 as usize])
.collect();
if h_charges.is_empty() {
let n_charge = m.atoms()
.find(|(_, a)| a.element == Element::N)
.map(|(i, _)| q[i.0 as usize])
.unwrap();
assert!(n_charge < 0.0, "amine N charge = {n_charge:.3}, expected negative");
} else {
for &hq in &h_charges {
assert!(hq > 0.05, "H-N charge = {hq:.3}, expected positive");
}
}
}
#[test]
fn pbci_table_has_99_entries() {
assert_eq!(MMFF94_PBCI.len(), 99);
}
#[test]
fn chg_table_has_498_entries() {
assert_eq!(MMFF94_CHG.len(), 498);
}
#[test]
fn glycine_h_types_correct() {
let m = mol("[NH2]CC(=O)O");
let types = assign_mmff94_numeric_types(&m).unwrap();
for i in 0..m.atom_count() {
let a = m.atom(AtomIdx(i as u32));
if a.element == Element::H {
let t = types[i];
assert!(
matches!(t, 5 | 23 | 24),
"H type should be 5/23/24, got {t}"
);
}
}
}
#[test]
fn furan_o_is_type_43() {
let m = mol("o1cccc1"); let types_result = assign_mmff94_numeric_types(&m);
if let Ok(types) = types_result {
for i in 0..m.atom_count() {
if m.atom(AtomIdx(i as u32)).element == Element::O {
let t = types[i];
assert!(matches!(t, 43 | 6), "furan O type = {t}");
}
}
}
}
}