Skip to main content

cosmolkit_core/model/
sgroup.rs

1use std::collections::BTreeMap;
2
3use crate::{AtomId, BondId};
4
5/// Substance-group identity inside a molecule.
6///
7/// RDKit SDF/MolBlock parsing can produce SGroups before they are interpreted
8/// by higher-level chemistry operations. Keep this as explicit molecule state;
9/// do not flatten SGroup data into string properties without human-author
10/// approval, because that loses atom/bond membership and processing semantics.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
12pub struct SubstanceGroupId(usize);
13
14impl SubstanceGroupId {
15    #[must_use]
16    pub const fn new(index: usize) -> Self {
17        Self(index)
18    }
19
20    #[must_use]
21    pub const fn index(self) -> usize {
22        self.0
23    }
24}
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum SubstanceGroupKind {
28    Data,
29    Superatom,
30    MultipleGroup,
31    StructuralRepeatUnit,
32    Monomer,
33    Copolymer,
34    Crosslink,
35    Graft,
36    Modification,
37    Mer,
38    AnyPolymer,
39    MixtureComponent,
40    Mixture,
41    Formulation,
42    Generic(String),
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum SGroupBondRole {
47    Crossing,
48    Contained,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq)]
52pub struct SGroupBracket {
53    pub p1: [f64; 2],
54    pub p2: [f64; 2],
55}
56
57#[derive(Debug, Clone, Copy, PartialEq)]
58pub struct SGroupCState {
59    pub bond: BondId,
60    pub vector: [f64; 2],
61}
62
63#[derive(Debug, Clone, PartialEq, Default)]
64pub struct SGroupDisplay {
65    pub brackets: Vec<SGroupBracket>,
66    pub field_position: Option<[f64; 2]>,
67    pub display_tag: Option<String>,
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Default)]
71pub struct SGroupData {
72    pub field_name: Option<String>,
73    pub field_type: Option<String>,
74    pub field_info: Option<String>,
75    pub field_display: Option<String>,
76    pub units: Option<String>,
77    pub query_type: Option<String>,
78    pub query_op: Option<String>,
79    pub values: Vec<String>,
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub enum SGroupConnection {
84    HeadToHead,
85    HeadToTail,
86    Either,
87    Unknown(String),
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub enum SGroupBracketStyle {
92    Bracket,
93    Parenthesis,
94    None,
95    Unknown(String),
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct SGroupAttachPoint {
100    pub atom: AtomId,
101    pub leaving_atom: Option<AtomId>,
102    pub label: Option<String>,
103    pub order: Option<u32>,
104}
105
106#[derive(Debug, Clone, PartialEq)]
107pub struct SubstanceGroup {
108    id: SubstanceGroupId,
109    rdkit_sequence_id: Option<u32>,
110    external_id: Option<u32>,
111    kind: SubstanceGroupKind,
112    atoms: Vec<AtomId>,
113    bonds: Vec<BondId>,
114    bond_roles: BTreeMap<BondId, SGroupBondRole>,
115    parent_atoms: Vec<AtomId>,
116    parent: Option<SubstanceGroupId>,
117    label: Option<String>,
118    connection: Option<SGroupConnection>,
119    subtype: Option<String>,
120    bracket_style: Option<SGroupBracketStyle>,
121    expansion_state: Option<String>,
122    class: Option<String>,
123    component_number: Option<u32>,
124    display: Option<SGroupDisplay>,
125    data: Option<SGroupData>,
126    attach_points: Vec<SGroupAttachPoint>,
127    cstates: Vec<SGroupCState>,
128    props: BTreeMap<String, String>,
129    data_fields: Vec<String>,
130}
131
132impl SubstanceGroup {
133    #[must_use]
134    pub fn new(id: SubstanceGroupId, kind: SubstanceGroupKind) -> Self {
135        Self {
136            id,
137            rdkit_sequence_id: None,
138            external_id: None,
139            kind,
140            atoms: Vec::new(),
141            bonds: Vec::new(),
142            bond_roles: BTreeMap::new(),
143            parent_atoms: Vec::new(),
144            parent: None,
145            label: None,
146            connection: None,
147            subtype: None,
148            bracket_style: None,
149            expansion_state: None,
150            class: None,
151            component_number: None,
152            display: None,
153            data: None,
154            attach_points: Vec::new(),
155            cstates: Vec::new(),
156            props: BTreeMap::new(),
157            data_fields: Vec::new(),
158        }
159    }
160
161    #[must_use]
162    pub const fn id(&self) -> SubstanceGroupId {
163        self.id
164    }
165
166    #[must_use]
167    pub const fn external_id(&self) -> Option<u32> {
168        self.external_id
169    }
170
171    #[must_use]
172    pub const fn rdkit_sequence_id(&self) -> Option<u32> {
173        self.rdkit_sequence_id
174    }
175
176    #[must_use]
177    pub const fn kind(&self) -> &SubstanceGroupKind {
178        &self.kind
179    }
180
181    #[must_use]
182    pub fn atoms(&self) -> &[AtomId] {
183        &self.atoms
184    }
185
186    #[must_use]
187    pub fn bonds(&self) -> &[BondId] {
188        &self.bonds
189    }
190
191    #[must_use]
192    pub fn bond_role(&self, bond: BondId) -> SGroupBondRole {
193        self.bond_roles
194            .get(&bond)
195            .copied()
196            .unwrap_or(SGroupBondRole::Crossing)
197    }
198
199    #[must_use]
200    pub fn parent_atoms(&self) -> &[AtomId] {
201        &self.parent_atoms
202    }
203
204    #[must_use]
205    pub const fn parent(&self) -> Option<SubstanceGroupId> {
206        self.parent
207    }
208
209    #[must_use]
210    pub fn props(&self) -> &BTreeMap<String, String> {
211        &self.props
212    }
213
214    #[must_use]
215    pub fn data_fields(&self) -> &[String] {
216        &self.data_fields
217    }
218
219    #[must_use]
220    pub fn label(&self) -> Option<&str> {
221        self.label.as_deref()
222    }
223
224    #[must_use]
225    pub const fn connection(&self) -> Option<&SGroupConnection> {
226        self.connection.as_ref()
227    }
228
229    #[must_use]
230    pub fn subtype(&self) -> Option<&str> {
231        self.subtype.as_deref()
232    }
233
234    #[must_use]
235    pub const fn bracket_style(&self) -> Option<&SGroupBracketStyle> {
236        self.bracket_style.as_ref()
237    }
238
239    #[must_use]
240    pub const fn display(&self) -> Option<&SGroupDisplay> {
241        self.display.as_ref()
242    }
243
244    #[must_use]
245    pub fn expansion_state(&self) -> Option<&str> {
246        self.expansion_state.as_deref()
247    }
248
249    #[must_use]
250    pub fn class(&self) -> Option<&str> {
251        self.class.as_deref()
252    }
253
254    #[must_use]
255    pub const fn component_number(&self) -> Option<u32> {
256        self.component_number
257    }
258
259    #[must_use]
260    pub const fn data(&self) -> Option<&SGroupData> {
261        self.data.as_ref()
262    }
263
264    #[must_use]
265    pub fn attach_points(&self) -> &[SGroupAttachPoint] {
266        &self.attach_points
267    }
268
269    #[must_use]
270    pub fn cstates(&self) -> &[SGroupCState] {
271        &self.cstates
272    }
273
274    #[must_use]
275    pub const fn with_rdkit_sequence_id(mut self, rdkit_sequence_id: u32) -> Self {
276        self.rdkit_sequence_id = Some(rdkit_sequence_id);
277        self
278    }
279
280    #[must_use]
281    pub const fn with_external_id(mut self, external_id: u32) -> Self {
282        self.external_id = Some(external_id);
283        self
284    }
285
286    #[must_use]
287    pub const fn with_parent(mut self, parent: SubstanceGroupId) -> Self {
288        self.parent = Some(parent);
289        self
290    }
291
292    #[must_use]
293    pub fn with_atoms(mut self, atoms: Vec<AtomId>) -> Self {
294        self.atoms = atoms;
295        self
296    }
297
298    #[must_use]
299    pub fn with_bonds(mut self, bonds: Vec<BondId>) -> Self {
300        self.bond_roles
301            .retain(|bond, _| bonds.iter().any(|candidate| candidate == bond));
302        self.bonds = bonds;
303        self
304    }
305
306    #[must_use]
307    pub fn with_bond_role(mut self, bond: BondId, role: SGroupBondRole) -> Self {
308        if self.bonds.iter().any(|candidate| *candidate == bond) {
309            self.bond_roles.insert(bond, role);
310        }
311        self
312    }
313
314    #[must_use]
315    pub fn with_parent_atoms(mut self, parent_atoms: Vec<AtomId>) -> Self {
316        self.parent_atoms = parent_atoms;
317        self
318    }
319
320    #[must_use]
321    pub fn with_label(mut self, label: impl Into<String>) -> Self {
322        self.label = Some(label.into());
323        self
324    }
325
326    #[must_use]
327    pub fn with_connection(mut self, connection: SGroupConnection) -> Self {
328        self.connection = Some(connection);
329        self
330    }
331
332    #[must_use]
333    pub fn with_subtype(mut self, subtype: impl Into<String>) -> Self {
334        self.subtype = Some(subtype.into());
335        self
336    }
337
338    #[must_use]
339    pub fn with_bracket_style(mut self, bracket_style: SGroupBracketStyle) -> Self {
340        self.bracket_style = Some(bracket_style);
341        self
342    }
343
344    #[must_use]
345    pub fn with_display(mut self, display: SGroupDisplay) -> Self {
346        self.display = Some(display);
347        self
348    }
349
350    #[must_use]
351    pub fn with_expansion_state(mut self, expansion_state: impl Into<String>) -> Self {
352        self.expansion_state = Some(expansion_state.into());
353        self
354    }
355
356    #[must_use]
357    pub fn with_class(mut self, class: impl Into<String>) -> Self {
358        self.class = Some(class.into());
359        self
360    }
361
362    #[must_use]
363    pub const fn with_component_number(mut self, component_number: u32) -> Self {
364        self.component_number = Some(component_number);
365        self
366    }
367
368    #[must_use]
369    pub fn with_data(mut self, data: SGroupData) -> Self {
370        self.data = Some(data);
371        self
372    }
373
374    #[must_use]
375    pub fn with_attach_points(mut self, attach_points: Vec<SGroupAttachPoint>) -> Self {
376        self.attach_points = attach_points;
377        self
378    }
379
380    #[must_use]
381    pub fn with_cstates(mut self, cstates: Vec<SGroupCState>) -> Self {
382        self.cstates = cstates;
383        self
384    }
385
386    #[must_use]
387    pub fn with_prop(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
388        self.props.insert(key.into(), value.into());
389        self
390    }
391
392    #[must_use]
393    pub fn with_data_field(mut self, value: impl Into<String>) -> Self {
394        self.data_fields.push(value.into());
395        self
396    }
397
398    pub(crate) fn push_data_field(&mut self, value: impl Into<String>) {
399        self.data_fields.push(value.into());
400    }
401
402    pub(crate) fn push_atom(&mut self, atom: AtomId) {
403        self.atoms.push(atom);
404    }
405
406    pub(crate) fn push_bond(&mut self, bond: BondId) {
407        self.push_bond_with_role(bond, SGroupBondRole::Crossing);
408    }
409
410    pub(crate) fn push_bond_with_role(&mut self, bond: BondId, role: SGroupBondRole) {
411        self.bonds.push(bond);
412        if role != SGroupBondRole::Crossing {
413            self.bond_roles.insert(bond, role);
414        }
415    }
416
417    pub(crate) fn remove_atom(&mut self, atom: AtomId) {
418        self.atoms.retain(|candidate| *candidate != atom);
419    }
420
421    pub(crate) fn remove_bond(&mut self, bond: BondId) {
422        self.bonds.retain(|candidate| *candidate != bond);
423        self.bond_roles.remove(&bond);
424    }
425
426    pub(crate) fn remove_parent_atom(&mut self, atom: AtomId) {
427        self.parent_atoms.retain(|candidate| *candidate != atom);
428    }
429
430    pub(crate) fn clear_attach_point_leaving_atom(&mut self, atom: AtomId) {
431        for attach_point in &mut self.attach_points {
432            if attach_point.leaving_atom == Some(atom) {
433                attach_point.leaving_atom = None;
434            }
435        }
436    }
437
438    #[allow(dead_code)]
439    pub(crate) fn push_parent_atom(&mut self, atom: AtomId) {
440        self.parent_atoms.push(atom);
441    }
442
443    pub(crate) fn set_external_id(&mut self, external_id: u32) {
444        self.external_id = Some(external_id);
445    }
446
447    #[allow(dead_code)]
448    pub(crate) fn set_parent(&mut self, parent: SubstanceGroupId) {
449        self.parent = Some(parent);
450    }
451
452    #[allow(dead_code)]
453    pub(crate) fn set_rdkit_sequence_id(&mut self, rdkit_sequence_id: u32) {
454        self.rdkit_sequence_id = Some(rdkit_sequence_id);
455    }
456
457    pub(crate) fn set_id(&mut self, id: SubstanceGroupId) {
458        self.id = id;
459    }
460
461    #[allow(dead_code)]
462    pub(crate) fn set_label(&mut self, label: impl Into<String>) {
463        self.label = Some(label.into());
464    }
465
466    #[allow(dead_code)]
467    pub(crate) fn set_connection(&mut self, connection: SGroupConnection) {
468        self.connection = Some(connection);
469    }
470
471    #[allow(dead_code)]
472    pub(crate) fn set_subtype(&mut self, subtype: impl Into<String>) {
473        self.subtype = Some(subtype.into());
474    }
475
476    #[allow(dead_code)]
477    pub(crate) fn set_bracket_style(&mut self, bracket_style: SGroupBracketStyle) {
478        self.bracket_style = Some(bracket_style);
479    }
480
481    #[allow(dead_code)]
482    pub(crate) fn set_expansion_state(&mut self, expansion_state: impl Into<String>) {
483        self.expansion_state = Some(expansion_state.into());
484    }
485
486    #[allow(dead_code)]
487    pub(crate) fn set_class(&mut self, class: impl Into<String>) {
488        self.class = Some(class.into());
489    }
490
491    #[allow(dead_code)]
492    pub(crate) fn set_component_number(&mut self, component_number: u32) {
493        self.component_number = Some(component_number);
494    }
495
496    #[allow(dead_code)]
497    pub(crate) fn display_mut(&mut self) -> &mut SGroupDisplay {
498        self.display.get_or_insert_with(SGroupDisplay::default)
499    }
500
501    #[allow(dead_code)]
502    pub(crate) fn data_mut(&mut self) -> &mut SGroupData {
503        self.data.get_or_insert_with(SGroupData::default)
504    }
505
506    #[allow(dead_code)]
507    pub(crate) fn push_attach_point(&mut self, attach_point: SGroupAttachPoint) {
508        self.attach_points.push(attach_point);
509    }
510
511    #[allow(dead_code)]
512    pub(crate) fn push_cstate(&mut self, cstate: SGroupCState) {
513        self.cstates.push(cstate);
514    }
515
516    pub(crate) fn set_prop(&mut self, key: impl Into<String>, value: impl Into<String>) {
517        self.props.insert(key.into(), value.into());
518    }
519
520    pub(crate) fn clear_prop(&mut self, key: &str) {
521        self.props.remove(key);
522    }
523
524    pub(crate) fn can_remap_without_parent(
525        &self,
526        atom_map: &[Option<AtomId>],
527        bond_map: &[Option<BondId>],
528    ) -> bool {
529        self.atoms
530            .iter()
531            .all(|atom| atom_map.get(atom.index()).is_some_and(Option::is_some))
532            && self
533                .bonds
534                .iter()
535                .all(|bond| bond_map.get(bond.index()).is_some_and(Option::is_some))
536            && self
537                .parent_atoms
538                .iter()
539                .all(|atom| atom_map.get(atom.index()).is_some_and(Option::is_some))
540            && self.attach_points.iter().all(|attach_point| {
541                atom_map
542                    .get(attach_point.atom.index())
543                    .is_some_and(Option::is_some)
544                    && attach_point.leaving_atom.is_none_or(|leaving_atom| {
545                        atom_map
546                            .get(leaving_atom.index())
547                            .is_some_and(Option::is_some)
548                    })
549            })
550            && self.cstates.iter().all(|cstate| {
551                bond_map
552                    .get(cstate.bond.index())
553                    .is_some_and(Option::is_some)
554            })
555    }
556
557    pub(crate) fn remapped(
558        &self,
559        id: SubstanceGroupId,
560        atom_map: &[Option<AtomId>],
561        bond_map: &[Option<BondId>],
562        sgroup_map: &[Option<SubstanceGroupId>],
563    ) -> Option<Self> {
564        let atoms: Option<Vec<_>> = self
565            .atoms
566            .iter()
567            .map(|atom| atom_map.get(atom.index()).and_then(|x| *x))
568            .collect();
569        let bonds: Option<Vec<_>> = self
570            .bonds
571            .iter()
572            .map(|bond| bond_map.get(bond.index()).and_then(|x| *x))
573            .collect();
574        let parent_atoms: Option<Vec<_>> = self
575            .parent_atoms
576            .iter()
577            .map(|atom| atom_map.get(atom.index()).and_then(|x| *x))
578            .collect();
579        let parent = match self.parent {
580            Some(parent) => sgroup_map.get(parent.index()).and_then(|x| *x),
581            None => None,
582        };
583        if self.parent.is_some() && parent.is_none() {
584            return None;
585        }
586        let attach_points: Option<Vec<_>> = self
587            .attach_points
588            .iter()
589            .map(|attach_point| {
590                let atom = atom_map.get(attach_point.atom.index()).and_then(|x| *x)?;
591                let leaving_atom = match attach_point.leaving_atom {
592                    Some(leaving_atom) => {
593                        Some(atom_map.get(leaving_atom.index()).and_then(|x| *x)?)
594                    }
595                    None => None,
596                };
597                Some(SGroupAttachPoint {
598                    atom,
599                    leaving_atom,
600                    label: attach_point.label.clone(),
601                    order: attach_point.order,
602                })
603            })
604            .collect();
605        let cstates: Option<Vec<_>> = self
606            .cstates
607            .iter()
608            .map(|cstate| {
609                let bond = bond_map.get(cstate.bond.index()).and_then(|x| *x)?;
610                Some(SGroupCState {
611                    bond,
612                    vector: cstate.vector,
613                })
614            })
615            .collect();
616        let mut bond_roles = BTreeMap::new();
617        for (old_bond, role) in &self.bond_roles {
618            let new_bond = bond_map.get(old_bond.index()).and_then(|x| *x)?;
619            if *role != SGroupBondRole::Crossing {
620                bond_roles.insert(new_bond, *role);
621            }
622        }
623        Some(Self {
624            id,
625            rdkit_sequence_id: self.rdkit_sequence_id,
626            external_id: self.external_id,
627            kind: self.kind.clone(),
628            atoms: atoms?,
629            bonds: bonds?,
630            bond_roles,
631            parent_atoms: parent_atoms?,
632            parent,
633            label: self.label.clone(),
634            connection: self.connection.clone(),
635            subtype: self.subtype.clone(),
636            bracket_style: self.bracket_style.clone(),
637            expansion_state: self.expansion_state.clone(),
638            class: self.class.clone(),
639            component_number: self.component_number,
640            display: self.display.clone(),
641            data: self.data.clone(),
642            attach_points: attach_points?,
643            cstates: cstates?,
644            props: self.props.clone(),
645            data_fields: self.data_fields.clone(),
646        })
647    }
648}