Skip to main content

inchi/
molecule.rs

1//! Programmatic structure input: build a [`Molecule`] from atoms, bonds, and
2//! 0D stereo, then generate its InChI via the native `GetINCHI` entry point.
3
4use crate::error::{InchiError, Result};
5use crate::options::Options;
6use crate::output::InchiOutput;
7
8/// Maximum number of atoms accepted by the InChI library (`MAX_ATOMS`).
9const MAX_ATOMS: usize = 1024;
10/// Maximum bonds recordable per atom in the FFI struct (`MAXVAL`).
11const MAX_BONDS_PER_ATOM: usize = inchi_sys::MAXVAL as usize;
12/// Capacity of the element-symbol field, including the trailing NUL.
13const ELNAME_CAP: usize = inchi_sys::ATOM_EL_LEN as usize;
14
15/// The unpaired-electron (radical) state of an atom.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17#[non_exhaustive]
18pub enum Radical {
19    /// No radical.
20    #[default]
21    None,
22    /// Singlet (carbene-like).
23    Singlet,
24    /// Doublet (one unpaired electron).
25    Doublet,
26    /// Triplet (two unpaired electrons).
27    Triplet,
28}
29
30impl Radical {
31    fn code(self) -> i8 {
32        let v = match self {
33            Radical::None => inchi_sys::INCHI_RADICAL_NONE,
34            Radical::Singlet => inchi_sys::INCHI_RADICAL_SINGLET,
35            Radical::Doublet => inchi_sys::INCHI_RADICAL_DOUBLET,
36            Radical::Triplet => inchi_sys::INCHI_RADICAL_TRIPLET,
37        };
38        v as i8
39    }
40}
41
42/// The order of a covalent bond.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
44#[non_exhaustive]
45pub enum BondOrder {
46    /// A single bond.
47    #[default]
48    Single,
49    /// A double bond.
50    Double,
51    /// A triple bond.
52    Triple,
53    /// An "alternating"/aromatic bond. The InChI documentation recommends
54    /// avoiding this in favor of explicit single/double bonds.
55    Alternating,
56}
57
58impl BondOrder {
59    fn code(self) -> i8 {
60        let v = match self {
61            BondOrder::Single => inchi_sys::INCHI_BOND_TYPE_SINGLE,
62            BondOrder::Double => inchi_sys::INCHI_BOND_TYPE_DOUBLE,
63            BondOrder::Triple => inchi_sys::INCHI_BOND_TYPE_TRIPLE,
64            BondOrder::Alternating => inchi_sys::INCHI_BOND_TYPE_ALTERN,
65        };
66        v as i8
67    }
68}
69
70/// How many implicit hydrogens an atom carries.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72#[non_exhaustive]
73pub enum ImplicitH {
74    /// Let the library add implicit hydrogens to satisfy normal valence
75    /// (`num_iso_H[0] = -1`). This is the usual choice for a heavy-atom
76    /// skeleton and mirrors molfile behavior.
77    #[default]
78    Auto,
79    /// Attach exactly this many implicit (non-isotopic) hydrogens.
80    Exactly(u8),
81}
82
83/// A 0D stereo parity (used when no coordinates disambiguate the geometry).
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85#[non_exhaustive]
86pub enum Parity {
87    /// Odd parity (`'-'` / `'o'`).
88    Odd,
89    /// Even parity (`'+'` / `'e'`).
90    Even,
91    /// Known to be stereogenic but of unspecified configuration (`'u'`).
92    Unknown,
93}
94
95impl Parity {
96    fn code(self) -> i8 {
97        let v = match self {
98            Parity::Odd => inchi_sys::INCHI_PARITY_ODD,
99            Parity::Even => inchi_sys::INCHI_PARITY_EVEN,
100            Parity::Unknown => inchi_sys::INCHI_PARITY_UNKNOWN,
101        };
102        v as i8
103    }
104}
105
106/// A single 0D stereo element, referencing atoms by their index in the
107/// [`Molecule`].
108///
109/// The neighbor ordering follows the InChI convention exactly; getting it wrong
110/// flips the parity, so validate against known identifiers. See the upstream
111/// `inchi_api.h` for the precise diagrams.
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113#[non_exhaustive]
114pub enum Stereo {
115    /// A tetrahedral stereocenter. `neighbors` lists the four substituents in
116    /// the order whose handedness defines `parity`, seen from the first
117    /// neighbor toward `center`.
118    Tetrahedral {
119        /// Index of the central atom.
120        center: usize,
121        /// The four neighbor atom indices, in convention order.
122        neighbors: [usize; 4],
123        /// The parity of the configuration.
124        parity: Parity,
125    },
126    /// A stereogenic double bond `>A=B<` (or even-length cumulene). `ends` is
127    /// `[X, A, B, Y]` where `A=B` is the double bond and `X`, `Y` are the
128    /// reference substituents.
129    DoubleBond {
130        /// `[X, A, B, Y]` atom indices.
131        ends: [usize; 4],
132        /// The parity of the configuration.
133        parity: Parity,
134    },
135    /// An allene / odd-length cumulene stereocenter. `ends` is `[X, A, B, Y]`
136    /// and `center` is the central cumulene atom.
137    Allene {
138        /// Index of the central atom.
139        center: usize,
140        /// `[X, A, B, Y]` atom indices.
141        ends: [usize; 4],
142        /// The parity of the configuration.
143        parity: Parity,
144    },
145}
146
147/// A single atom in a [`Molecule`].
148///
149/// Construct with [`Atom::new`] and refine with the chainable setters.
150///
151/// ```
152/// use inchi::{Atom, Radical, ImplicitH};
153///
154/// let carbon = Atom::new("C").position(0.0, 0.0, 0.0);
155/// let chloride = Atom::new("Cl").charge(-1).implicit_hydrogens(ImplicitH::Exactly(0));
156/// let _ = (carbon, chloride, Radical::None);
157/// ```
158#[derive(Debug, Clone, PartialEq)]
159pub struct Atom {
160    element: String,
161    x: f64,
162    y: f64,
163    z: f64,
164    charge: i8,
165    isotope: Option<u16>,
166    radical: Radical,
167    implicit_h: ImplicitH,
168}
169
170impl Atom {
171    /// Creates an atom of the given element (e.g. `"C"`, `"Cl"`, `"Na"`).
172    ///
173    /// The symbol is validated when the molecule is converted; an empty,
174    /// non-ASCII, or over-long symbol yields [`InchiError::InvalidStructure`].
175    ///
176    /// ```
177    /// use inchi::Atom;
178    /// let _ = Atom::new("O");
179    /// ```
180    #[must_use]
181    pub fn new(element: impl Into<String>) -> Self {
182        Atom {
183            element: element.into(),
184            x: 0.0,
185            y: 0.0,
186            z: 0.0,
187            charge: 0,
188            isotope: None,
189            radical: Radical::None,
190            implicit_h: ImplicitH::Auto,
191        }
192    }
193
194    /// Sets the 3D coordinates of the atom (defaults to the origin).
195    ///
196    /// ```
197    /// use inchi::Atom;
198    /// let _ = Atom::new("C").position(1.0, 0.5, -0.25);
199    /// ```
200    #[must_use]
201    pub fn position(mut self, x: f64, y: f64, z: f64) -> Self {
202        self.x = x;
203        self.y = y;
204        self.z = z;
205        self
206    }
207
208    /// Sets the formal charge (defaults to `0`).
209    ///
210    /// ```
211    /// use inchi::Atom;
212    /// let _ = Atom::new("N").charge(1);
213    /// ```
214    #[must_use]
215    pub fn charge(mut self, charge: i8) -> Self {
216        self.charge = charge;
217        self
218    }
219
220    /// Sets the absolute isotopic mass (e.g. `13` for carbon-13). Omit for the
221    /// natural isotopic composition.
222    ///
223    /// ```
224    /// use inchi::Atom;
225    /// let _ = Atom::new("C").isotope(13);
226    /// ```
227    #[must_use]
228    pub fn isotope(mut self, mass: u16) -> Self {
229        self.isotope = Some(mass);
230        self
231    }
232
233    /// Sets the radical state (defaults to [`Radical::None`]).
234    ///
235    /// ```
236    /// use inchi::{Atom, Radical};
237    /// let _ = Atom::new("C").radical(Radical::Triplet);
238    /// ```
239    #[must_use]
240    pub fn radical(mut self, radical: Radical) -> Self {
241        self.radical = radical;
242        self
243    }
244
245    /// Sets how implicit hydrogens are handled (defaults to [`ImplicitH::Auto`]).
246    ///
247    /// ```
248    /// use inchi::{Atom, ImplicitH};
249    /// let _ = Atom::new("C").implicit_hydrogens(ImplicitH::Exactly(3));
250    /// ```
251    #[must_use]
252    pub fn implicit_hydrogens(mut self, h: ImplicitH) -> Self {
253        self.implicit_h = h;
254        self
255    }
256}
257
258/// A molecular structure assembled programmatically from atoms, bonds, and 0D
259/// stereo descriptors.
260///
261/// Atoms are referenced by the index returned from [`Molecule::add_atom`] (also
262/// the order in which they are added, starting at `0`).
263///
264/// ```
265/// use inchi::{Molecule, Atom, BondOrder};
266///
267/// // Ethanol: C-C-O (implicit hydrogens added automatically).
268/// let mut mol = Molecule::new();
269/// let c1 = mol.add_atom(Atom::new("C"));
270/// let c2 = mol.add_atom(Atom::new("C"));
271/// let o = mol.add_atom(Atom::new("O"));
272/// mol.add_bond(c1, c2, BondOrder::Single)?;
273/// mol.add_bond(c2, o, BondOrder::Single)?;
274///
275/// let out = mol.to_inchi(())?;
276/// assert_eq!(out.inchi(), "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3");
277/// # Ok::<(), inchi::InchiError>(())
278/// ```
279#[derive(Debug, Clone, Default, PartialEq)]
280pub struct Molecule {
281    atoms: Vec<Atom>,
282    bonds: Vec<(usize, usize, BondOrder)>,
283    stereo: Vec<Stereo>,
284    polymer_units: Vec<crate::polymer::PolymerUnit>,
285}
286
287impl Molecule {
288    /// Creates an empty molecule.
289    ///
290    /// ```
291    /// use inchi::Molecule;
292    /// let mol = Molecule::new();
293    /// assert_eq!(mol.atom_count(), 0);
294    /// ```
295    #[must_use]
296    pub fn new() -> Self {
297        Molecule::default()
298    }
299
300    /// Adds an atom and returns its index.
301    ///
302    /// ```
303    /// use inchi::{Molecule, Atom};
304    /// let mut mol = Molecule::new();
305    /// assert_eq!(mol.add_atom(Atom::new("C")), 0);
306    /// assert_eq!(mol.add_atom(Atom::new("O")), 1);
307    /// ```
308    pub fn add_atom(&mut self, atom: Atom) -> usize {
309        self.atoms.push(atom);
310        self.atoms.len() - 1
311    }
312
313    /// Adds a bond between two existing atoms.
314    ///
315    /// Returns [`InchiError::InvalidStructure`] if either index is out of range
316    /// or if `a == b`.
317    ///
318    /// ```
319    /// use inchi::{Molecule, Atom, BondOrder};
320    /// let mut mol = Molecule::new();
321    /// let a = mol.add_atom(Atom::new("C"));
322    /// let b = mol.add_atom(Atom::new("O"));
323    /// mol.add_bond(a, b, BondOrder::Double)?;
324    /// assert!(mol.add_bond(a, 99, BondOrder::Single).is_err());
325    /// # Ok::<(), inchi::InchiError>(())
326    /// ```
327    pub fn add_bond(&mut self, a: usize, b: usize, order: BondOrder) -> Result<()> {
328        if a == b {
329            return Err(InchiError::InvalidStructure {
330                reason: format!("bond connects atom {a} to itself"),
331            });
332        }
333        let n = self.atoms.len();
334        if a >= n || b >= n {
335            return Err(InchiError::InvalidStructure {
336                reason: format!("bond ({a}, {b}) references a nonexistent atom (have {n})"),
337            });
338        }
339        self.bonds.push((a, b, order));
340        Ok(())
341    }
342
343    /// Adds a 0D stereo descriptor. Indices are validated at conversion time.
344    ///
345    /// ```
346    /// use inchi::{Molecule, Atom, Stereo, Parity};
347    /// let mut mol = Molecule::new();
348    /// for el in ["C", "C", "N", "O"] { mol.add_atom(Atom::new(el)); }
349    /// mol.add_stereo(Stereo::Tetrahedral { center: 0, neighbors: [1, 2, 3, 0], parity: Parity::Odd });
350    /// assert_eq!(mol.stereo_count(), 1);
351    /// ```
352    pub fn add_stereo(&mut self, stereo: Stereo) {
353        self.stereo.push(stereo);
354    }
355
356    /// Adds a polymer structural repeating unit, switching InChI generation to
357    /// the extended `GetINCHIEx` entry point.
358    ///
359    /// Polymers require the [`Options::polymers`](crate::Options::polymers)
360    /// flag to be set and yield a non-standard, beta-flagged InChI. The unit's
361    /// atom indices refer to atoms already added to this molecule; the two
362    /// chain ends are normally capped with `"Zz"` star atoms.
363    ///
364    /// ```
365    /// use inchi::{Molecule, Atom, BondOrder, Options, Polymers, PolymerUnit};
366    /// // A polyethylene repeat unit: *-CH2-CH2-*
367    /// let mut mol = Molecule::new();
368    /// let s1 = mol.add_atom(Atom::new("Zz"));
369    /// let c1 = mol.add_atom(Atom::new("C"));
370    /// let c2 = mol.add_atom(Atom::new("C"));
371    /// let s2 = mol.add_atom(Atom::new("Zz"));
372    /// mol.add_bond(s1, c1, BondOrder::Single)?;
373    /// mol.add_bond(c1, c2, BondOrder::Single)?;
374    /// mol.add_bond(c2, s2, BondOrder::Single)?;
375    /// mol.add_polymer_unit(PolymerUnit::sru([c1, c2], [[s1, c1], [c2, s2]]));
376    /// let inchi = mol.to_inchi(Options::new().polymers(Polymers::On))?.into_inchi();
377    /// assert!(inchi.contains("/z"));
378    /// # Ok::<(), inchi::InchiError>(())
379    /// ```
380    pub fn add_polymer_unit(&mut self, unit: crate::polymer::PolymerUnit) {
381        self.polymer_units.push(unit);
382    }
383
384    /// The number of atoms.
385    ///
386    /// ```
387    /// # use inchi::{Molecule, Atom};
388    /// let mut mol = Molecule::new();
389    /// mol.add_atom(Atom::new("C"));
390    /// assert_eq!(mol.atom_count(), 1);
391    /// ```
392    #[must_use]
393    pub fn atom_count(&self) -> usize {
394        self.atoms.len()
395    }
396
397    /// The number of bonds.
398    ///
399    /// ```
400    /// # use inchi::{Molecule, Atom, BondOrder};
401    /// let mut mol = Molecule::new();
402    /// let a = mol.add_atom(Atom::new("C"));
403    /// let b = mol.add_atom(Atom::new("C"));
404    /// mol.add_bond(a, b, BondOrder::Single)?;
405    /// assert_eq!(mol.bond_count(), 1);
406    /// # Ok::<(), inchi::InchiError>(())
407    /// ```
408    #[must_use]
409    pub fn bond_count(&self) -> usize {
410        self.bonds.len()
411    }
412
413    /// The number of 0D stereo descriptors.
414    ///
415    /// ```
416    /// # use inchi::Molecule;
417    /// assert_eq!(Molecule::new().stereo_count(), 0);
418    /// ```
419    #[must_use]
420    pub fn stereo_count(&self) -> usize {
421        self.stereo.len()
422    }
423
424    /// Generates the InChI for this molecule using the given [`Options`].
425    ///
426    /// ```
427    /// use inchi::{Molecule, Atom};
428    /// // A lone oxygen atom becomes water once implicit H are added.
429    /// let mut mol = Molecule::new();
430    /// mol.add_atom(Atom::new("O"));
431    /// assert_eq!(mol.to_inchi(())?.inchi(), "InChI=1S/H2O/h1H2");
432    /// # Ok::<(), inchi::InchiError>(())
433    /// ```
434    pub fn to_inchi(&self, options: impl Into<Options>) -> Result<InchiOutput> {
435        let options = options.into();
436        let mut atoms = self.build_atoms()?;
437        let mut stereo = self.build_stereo()?;
438        let opts = crate::raw::to_cstring(&options.to_arg_string())?;
439
440        let num_atoms = i16::try_from(atoms.len()).map_err(|_| InchiError::InvalidStructure {
441            reason: format!("too many atoms ({})", atoms.len()),
442        })?;
443        let num_stereo = i16::try_from(stereo.len()).map_err(|_| InchiError::InvalidStructure {
444            reason: format!("too many stereo elements ({})", stereo.len()),
445        })?;
446
447        // SAFETY: `input` borrows the `atoms`/`stereo`/`opts` allocations, all
448        // of which outlive the `GetINCHI` call below. `GetINCHI` does not take
449        // ownership of the input (the caller owns it), and we serialize via the
450        // global lock. The output is owned by an `OutputGuard` that frees it.
451        let mut input: inchi_sys::inchi_Input = unsafe { std::mem::zeroed() };
452        input.atom = atoms.as_mut_ptr();
453        input.num_atoms = num_atoms;
454        input.stereo0D = if stereo.is_empty() {
455            std::ptr::null_mut()
456        } else {
457            stereo.as_mut_ptr()
458        };
459        input.num_stereo0D = num_stereo;
460        input.szOptions = opts.as_ptr() as *mut std::os::raw::c_char;
461
462        if self.polymer_units.is_empty() {
463            let _guard = crate::raw::lock();
464            let mut out = crate::raw::OutputGuard::new();
465            let rc = unsafe { inchi_sys::GetINCHI(&mut input, out.as_mut_ptr()) };
466            // Keep the input-backing allocations alive until after the FFI call.
467            drop(atoms);
468            drop(stereo);
469            drop(opts);
470            return crate::build_output(rc, &out);
471        }
472
473        // Polymer input requires the extended `GetINCHIEx` entry point. Build
474        // the polymer block, keeping every backing allocation alive across the
475        // call. `inchi_InputEx` shares its first fields with `inchi_Input`.
476        let mut backing = PolymerBacking::build(&self.polymer_units, self.atoms.len())?;
477        let mut input_ex: inchi_sys::inchi_InputEx = unsafe { std::mem::zeroed() };
478        input_ex.atom = atoms.as_mut_ptr();
479        input_ex.num_atoms = num_atoms;
480        input_ex.stereo0D = if stereo.is_empty() {
481            std::ptr::null_mut()
482        } else {
483            stereo.as_mut_ptr()
484        };
485        input_ex.num_stereo0D = num_stereo;
486        input_ex.szOptions = opts.as_ptr() as *mut std::os::raw::c_char;
487        input_ex.polymer = backing.as_mut_ptr();
488
489        let _guard = crate::raw::lock();
490        let mut out = crate::raw::OutputGuard::new();
491        let rc = unsafe { inchi_sys::GetINCHIEx(&mut input_ex, out.as_mut_ptr()) };
492        // Keep all input-backing allocations alive until after the FFI call.
493        drop(atoms);
494        drop(stereo);
495        drop(opts);
496        drop(backing);
497        crate::build_output(rc, &out)
498    }
499
500    fn build_atoms(&self) -> Result<Vec<inchi_sys::inchi_Atom>> {
501        if self.atoms.is_empty() {
502            return Err(InchiError::InvalidStructure {
503                reason: "molecule has no atoms".to_string(),
504            });
505        }
506        if self.atoms.len() > MAX_ATOMS {
507            return Err(InchiError::InvalidStructure {
508                reason: format!("too many atoms ({} > {MAX_ATOMS})", self.atoms.len()),
509            });
510        }
511
512        let mut raw: Vec<inchi_sys::inchi_Atom> = Vec::with_capacity(self.atoms.len());
513        for atom in &self.atoms {
514            let mut a: inchi_sys::inchi_Atom = unsafe { std::mem::zeroed() };
515            a.x = atom.x;
516            a.y = atom.y;
517            a.z = atom.z;
518            write_elname(&mut a.elname, &atom.element)?;
519            a.charge = atom.charge;
520            a.radical = atom.radical.code();
521            if let Some(mass) = atom.isotope {
522                a.isotopic_mass =
523                    i16::try_from(mass).map_err(|_| InchiError::InvalidStructure {
524                        reason: format!("isotopic mass {mass} out of range"),
525                    })?;
526            }
527            a.num_iso_H = match atom.implicit_h {
528                ImplicitH::Auto => [-1, 0, 0, 0],
529                ImplicitH::Exactly(n) => [
530                    i8::try_from(n).map_err(|_| InchiError::InvalidStructure {
531                        reason: format!("implicit H count {n} out of range"),
532                    })?,
533                    0,
534                    0,
535                    0,
536                ],
537            };
538            raw.push(a);
539        }
540
541        // Populate the symmetric adjacency lists from the bond list. Each bond
542        // is recorded in both endpoints, as a molfile-derived input would be.
543        for &(a, b, order) in &self.bonds {
544            push_neighbor(&mut raw, a, b, order)?;
545            push_neighbor(&mut raw, b, a, order)?;
546        }
547
548        Ok(raw)
549    }
550
551    fn build_stereo(&self) -> Result<Vec<inchi_sys::inchi_Stereo0D>> {
552        let n = self.atoms.len();
553        let check = |idx: usize| -> Result<i16> {
554            if idx >= n {
555                return Err(InchiError::InvalidStructure {
556                    reason: format!("stereo references nonexistent atom {idx} (have {n})"),
557                });
558            }
559            i16::try_from(idx).map_err(|_| InchiError::InvalidStructure {
560                reason: format!("atom index {idx} out of range"),
561            })
562        };
563
564        let mut raw = Vec::with_capacity(self.stereo.len());
565        for stereo in &self.stereo {
566            let mut s: inchi_sys::inchi_Stereo0D = unsafe { std::mem::zeroed() };
567            match *stereo {
568                Stereo::Tetrahedral {
569                    center,
570                    neighbors,
571                    parity,
572                } => {
573                    s.central_atom = check(center)?;
574                    s.neighbor = [
575                        check(neighbors[0])?,
576                        check(neighbors[1])?,
577                        check(neighbors[2])?,
578                        check(neighbors[3])?,
579                    ];
580                    s.type_ = inchi_sys::INCHI_StereoType_Tetrahedral as i8;
581                    s.parity = parity.code();
582                }
583                Stereo::DoubleBond { ends, parity } => {
584                    s.central_atom = inchi_sys::NO_ATOM as i16;
585                    s.neighbor = [
586                        check(ends[0])?,
587                        check(ends[1])?,
588                        check(ends[2])?,
589                        check(ends[3])?,
590                    ];
591                    s.type_ = inchi_sys::INCHI_StereoType_DoubleBond as i8;
592                    s.parity = parity.code();
593                }
594                Stereo::Allene {
595                    center,
596                    ends,
597                    parity,
598                } => {
599                    s.central_atom = check(center)?;
600                    s.neighbor = [
601                        check(ends[0])?,
602                        check(ends[1])?,
603                        check(ends[2])?,
604                        check(ends[3])?,
605                    ];
606                    s.type_ = inchi_sys::INCHI_StereoType_Allene as i8;
607                    s.parity = parity.code();
608                }
609            }
610            raw.push(s);
611        }
612        Ok(raw)
613    }
614}
615
616/// Owns every heap allocation behind an [`inchi_sys::inchi_Input_Polymer`] so
617/// the C side sees stable pointers for the whole `GetINCHIEx` call.
618///
619/// Field order matters only for clarity; all pointers are taken after the
620/// backing vectors are fully populated, and the inner buffers stay put for the
621/// lifetime of the value.
622struct PolymerBacking {
623    // Per-unit 1-based atom lists (SAL) and crossing-bond lists (SBL).
624    alists: Vec<Vec<std::os::raw::c_int>>,
625    blists: Vec<Vec<std::os::raw::c_int>>,
626    // The unit structs and the array of pointers to them.
627    units: Vec<inchi_sys::inchi_Input_PolymerUnit>,
628    unit_ptrs: Vec<*mut inchi_sys::inchi_Input_PolymerUnit>,
629    polymer: inchi_sys::inchi_Input_Polymer,
630}
631
632impl PolymerBacking {
633    fn build(units_in: &[crate::polymer::PolymerUnit], num_atoms: usize) -> Result<Box<Self>> {
634        let one_based = |idx: usize| -> Result<std::os::raw::c_int> {
635            if idx >= num_atoms {
636                return Err(InchiError::InvalidStructure {
637                    reason: format!(
638                        "polymer unit references nonexistent atom {idx} (have {num_atoms})"
639                    ),
640                });
641            }
642            i32::try_from(idx + 1).map_err(|_| InchiError::InvalidStructure {
643                reason: format!("atom index {idx} out of range"),
644            })
645        };
646
647        let mut alists = Vec::with_capacity(units_in.len());
648        let mut blists = Vec::with_capacity(units_in.len());
649        for unit in units_in {
650            let mut alist = Vec::with_capacity(unit.atoms.len());
651            for &a in &unit.atoms {
652                alist.push(one_based(a)?);
653            }
654            let mut blist = Vec::with_capacity(unit.crossing_bonds.len() * 2);
655            for &[a, b] in &unit.crossing_bonds {
656                blist.push(one_based(a)?);
657                blist.push(one_based(b)?);
658            }
659            alists.push(alist);
660            blists.push(blist);
661        }
662
663        // Allocate boxed so the struct's address (and thus every interior
664        // pointer the C side stores) is stable even if the caller moves us.
665        let mut me = Box::new(PolymerBacking {
666            alists,
667            blists,
668            units: Vec::with_capacity(units_in.len()),
669            unit_ptrs: Vec::with_capacity(units_in.len()),
670            polymer: unsafe { std::mem::zeroed() },
671        });
672
673        // Build the unit structs into a local Vec, taking stable buffer
674        // pointers from the already-populated `alists`/`blists` (which live in
675        // `me`). Using a local Vec sidesteps simultaneous borrows of `me`.
676        let mut built = Vec::with_capacity(units_in.len());
677        let lists = me.alists.iter().zip(me.blists.iter());
678        for (unit, (alist, blist)) in units_in.iter().zip(lists) {
679            let mut raw: inchi_sys::inchi_Input_PolymerUnit = unsafe { std::mem::zeroed() };
680            raw.id = unit.id;
681            raw.label = unit.label;
682            raw.type_ = unit.kind.code();
683            raw.subtype = unit.subtype.code();
684            raw.conn = unit.connection.code();
685            raw.na = i32::try_from(unit.atoms.len()).unwrap_or(0);
686            raw.nb = i32::try_from(unit.crossing_bonds.len()).unwrap_or(0);
687            write_subscript(&mut raw.smt, &unit.subscript);
688            // SAFETY: the inner buffers live as long as `me`; the C side never
689            // mutates them, so casting the const buffer pointer to `*mut` is sound.
690            raw.alist = if alist.is_empty() {
691                std::ptr::null_mut()
692            } else {
693                alist.as_ptr() as *mut std::os::raw::c_int
694            };
695            raw.blist = if blist.is_empty() {
696                std::ptr::null_mut()
697            } else {
698                blist.as_ptr() as *mut std::os::raw::c_int
699            };
700            built.push(raw);
701        }
702        me.units = built;
703
704        // Record the array of pointers into the now-stable `units`.
705        let mut ptrs = Vec::with_capacity(me.units.len());
706        for u in me.units.iter_mut() {
707            ptrs.push(u as *mut inchi_sys::inchi_Input_PolymerUnit);
708        }
709        me.unit_ptrs = ptrs;
710        me.polymer.n = i32::try_from(me.unit_ptrs.len()).unwrap_or(0);
711        me.polymer.units = me.unit_ptrs.as_mut_ptr();
712        Ok(me)
713    }
714
715    fn as_mut_ptr(&mut self) -> *mut inchi_sys::inchi_Input_Polymer {
716        &mut self.polymer
717    }
718}
719
720/// Writes a polymer Sgroup subscript into the fixed-size `smt` field (80 bytes,
721/// NUL-terminated), truncating if necessary.
722fn write_subscript(dst: &mut [std::os::raw::c_char; 80], subscript: &str) {
723    let max = dst.len().saturating_sub(1);
724    for (slot, &b) in dst.iter_mut().zip(subscript.as_bytes().iter().take(max)) {
725        *slot = b as std::os::raw::c_char;
726    }
727}
728
729/// Records `to` as a neighbor of `from` in the raw atom adjacency list.
730fn push_neighbor(
731    atoms: &mut [inchi_sys::inchi_Atom],
732    from: usize,
733    to: usize,
734    order: BondOrder,
735) -> Result<()> {
736    let to_idx = i16::try_from(to).map_err(|_| InchiError::InvalidStructure {
737        reason: format!("atom index {to} out of range"),
738    })?;
739    let atom = atoms
740        .get_mut(from)
741        .ok_or_else(|| InchiError::InvalidStructure {
742            reason: format!("bond references nonexistent atom {from}"),
743        })?;
744    let slot = atom.num_bonds as usize;
745    if slot >= MAX_BONDS_PER_ATOM {
746        return Err(InchiError::InvalidStructure {
747            reason: format!("atom {from} exceeds the maximum of {MAX_BONDS_PER_ATOM} bonds"),
748        });
749    }
750    if let (Some(nbr), Some(bt)) = (atom.neighbor.get_mut(slot), atom.bond_type.get_mut(slot)) {
751        *nbr = to_idx;
752        *bt = order.code();
753        atom.num_bonds += 1;
754        Ok(())
755    } else {
756        Err(InchiError::InvalidStructure {
757            reason: format!("atom {from} bond slot {slot} out of range"),
758        })
759    }
760}
761
762/// Writes an element symbol into the fixed-size `elname` field, validating it.
763fn write_elname(dst: &mut [std::os::raw::c_char; ELNAME_CAP], symbol: &str) -> Result<()> {
764    let bytes = symbol.as_bytes();
765    if bytes.is_empty() {
766        return Err(InchiError::InvalidStructure {
767            reason: "empty element symbol".to_string(),
768        });
769    }
770    if !symbol.is_ascii() {
771        return Err(InchiError::InvalidStructure {
772            reason: format!("element symbol {symbol:?} is not ASCII"),
773        });
774    }
775    if bytes.len() >= ELNAME_CAP {
776        return Err(InchiError::InvalidStructure {
777            reason: format!(
778                "element symbol {symbol:?} is too long (max {} chars)",
779                ELNAME_CAP - 1
780            ),
781        });
782    }
783    for (slot, &b) in dst.iter_mut().zip(bytes) {
784        *slot = b as std::os::raw::c_char;
785    }
786    Ok(())
787}