inchi/molecule.rs
1//! Programmatic structure input: build a [`Molecule`] from atoms, bonds, and
2//! 0D stereo, then generate its InChI via the native `GetINCHI` entry point.
3
4use crate::error::{InchiError, Result};
5use crate::options::Options;
6use crate::output::InchiOutput;
7
8/// Maximum number of atoms accepted by the InChI library (`MAX_ATOMS`).
9const MAX_ATOMS: usize = 1024;
10/// Maximum bonds recordable per atom in the FFI struct (`MAXVAL`).
11const MAX_BONDS_PER_ATOM: usize = inchi_sys::MAXVAL as usize;
12/// Capacity of the element-symbol field, including the trailing NUL.
13const ELNAME_CAP: usize = inchi_sys::ATOM_EL_LEN as usize;
14
15/// The unpaired-electron (radical) state of an atom.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17#[non_exhaustive]
18pub enum Radical {
19 /// No radical.
20 #[default]
21 None,
22 /// Singlet (carbene-like).
23 Singlet,
24 /// Doublet (one unpaired electron).
25 Doublet,
26 /// Triplet (two unpaired electrons).
27 Triplet,
28}
29
30impl Radical {
31 fn code(self) -> i8 {
32 let v = match self {
33 Radical::None => inchi_sys::INCHI_RADICAL_NONE,
34 Radical::Singlet => inchi_sys::INCHI_RADICAL_SINGLET,
35 Radical::Doublet => inchi_sys::INCHI_RADICAL_DOUBLET,
36 Radical::Triplet => inchi_sys::INCHI_RADICAL_TRIPLET,
37 };
38 v as i8
39 }
40}
41
42/// The order of a covalent bond.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
44#[non_exhaustive]
45pub enum BondOrder {
46 /// A single bond.
47 #[default]
48 Single,
49 /// A double bond.
50 Double,
51 /// A triple bond.
52 Triple,
53 /// An "alternating"/aromatic bond. The InChI documentation recommends
54 /// avoiding this in favor of explicit single/double bonds.
55 Alternating,
56}
57
58impl BondOrder {
59 fn code(self) -> i8 {
60 let v = match self {
61 BondOrder::Single => inchi_sys::INCHI_BOND_TYPE_SINGLE,
62 BondOrder::Double => inchi_sys::INCHI_BOND_TYPE_DOUBLE,
63 BondOrder::Triple => inchi_sys::INCHI_BOND_TYPE_TRIPLE,
64 BondOrder::Alternating => inchi_sys::INCHI_BOND_TYPE_ALTERN,
65 };
66 v as i8
67 }
68}
69
70/// How many implicit hydrogens an atom carries.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72#[non_exhaustive]
73pub enum ImplicitH {
74 /// Let the library add implicit hydrogens to satisfy normal valence
75 /// (`num_iso_H[0] = -1`). This is the usual choice for a heavy-atom
76 /// skeleton and mirrors molfile behavior.
77 #[default]
78 Auto,
79 /// Attach exactly this many implicit (non-isotopic) hydrogens.
80 Exactly(u8),
81}
82
83/// A 0D stereo parity (used when no coordinates disambiguate the geometry).
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85#[non_exhaustive]
86pub enum Parity {
87 /// Odd parity (`'-'` / `'o'`).
88 Odd,
89 /// Even parity (`'+'` / `'e'`).
90 Even,
91 /// Known to be stereogenic but of unspecified configuration (`'u'`).
92 Unknown,
93}
94
95impl Parity {
96 fn code(self) -> i8 {
97 let v = match self {
98 Parity::Odd => inchi_sys::INCHI_PARITY_ODD,
99 Parity::Even => inchi_sys::INCHI_PARITY_EVEN,
100 Parity::Unknown => inchi_sys::INCHI_PARITY_UNKNOWN,
101 };
102 v as i8
103 }
104}
105
106/// A single 0D stereo element, referencing atoms by their index in the
107/// [`Molecule`].
108///
109/// The neighbor ordering follows the InChI convention exactly; getting it wrong
110/// flips the parity, so validate against known identifiers. See the upstream
111/// `inchi_api.h` for the precise diagrams.
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113#[non_exhaustive]
114pub enum Stereo {
115 /// A tetrahedral stereocenter. `neighbors` lists the four substituents in
116 /// the order whose handedness defines `parity`, seen from the first
117 /// neighbor toward `center`.
118 Tetrahedral {
119 /// Index of the central atom.
120 center: usize,
121 /// The four neighbor atom indices, in convention order.
122 neighbors: [usize; 4],
123 /// The parity of the configuration.
124 parity: Parity,
125 },
126 /// A stereogenic double bond `>A=B<` (or even-length cumulene). `ends` is
127 /// `[X, A, B, Y]` where `A=B` is the double bond and `X`, `Y` are the
128 /// reference substituents.
129 DoubleBond {
130 /// `[X, A, B, Y]` atom indices.
131 ends: [usize; 4],
132 /// The parity of the configuration.
133 parity: Parity,
134 },
135 /// An allene / odd-length cumulene stereocenter. `ends` is `[X, A, B, Y]`
136 /// and `center` is the central cumulene atom.
137 Allene {
138 /// Index of the central atom.
139 center: usize,
140 /// `[X, A, B, Y]` atom indices.
141 ends: [usize; 4],
142 /// The parity of the configuration.
143 parity: Parity,
144 },
145}
146
147/// A single atom in a [`Molecule`].
148///
149/// Construct with [`Atom::new`] and refine with the chainable setters.
150///
151/// ```
152/// use inchi::{Atom, Radical, ImplicitH};
153///
154/// let carbon = Atom::new("C").position(0.0, 0.0, 0.0);
155/// let chloride = Atom::new("Cl").charge(-1).implicit_hydrogens(ImplicitH::Exactly(0));
156/// let _ = (carbon, chloride, Radical::None);
157/// ```
158#[derive(Debug, Clone, PartialEq)]
159pub struct Atom {
160 element: String,
161 x: f64,
162 y: f64,
163 z: f64,
164 charge: i8,
165 isotope: Option<u16>,
166 radical: Radical,
167 implicit_h: ImplicitH,
168}
169
170impl Atom {
171 /// Creates an atom of the given element (e.g. `"C"`, `"Cl"`, `"Na"`).
172 ///
173 /// The symbol is validated when the molecule is converted; an empty,
174 /// non-ASCII, or over-long symbol yields [`InchiError::InvalidStructure`].
175 ///
176 /// ```
177 /// use inchi::Atom;
178 /// let _ = Atom::new("O");
179 /// ```
180 #[must_use]
181 pub fn new(element: impl Into<String>) -> Self {
182 Atom {
183 element: element.into(),
184 x: 0.0,
185 y: 0.0,
186 z: 0.0,
187 charge: 0,
188 isotope: None,
189 radical: Radical::None,
190 implicit_h: ImplicitH::Auto,
191 }
192 }
193
194 /// Sets the 3D coordinates of the atom (defaults to the origin).
195 ///
196 /// ```
197 /// use inchi::Atom;
198 /// let _ = Atom::new("C").position(1.0, 0.5, -0.25);
199 /// ```
200 #[must_use]
201 pub fn position(mut self, x: f64, y: f64, z: f64) -> Self {
202 self.x = x;
203 self.y = y;
204 self.z = z;
205 self
206 }
207
208 /// Sets the formal charge (defaults to `0`).
209 ///
210 /// ```
211 /// use inchi::Atom;
212 /// let _ = Atom::new("N").charge(1);
213 /// ```
214 #[must_use]
215 pub fn charge(mut self, charge: i8) -> Self {
216 self.charge = charge;
217 self
218 }
219
220 /// Sets the absolute isotopic mass (e.g. `13` for carbon-13). Omit for the
221 /// natural isotopic composition.
222 ///
223 /// ```
224 /// use inchi::Atom;
225 /// let _ = Atom::new("C").isotope(13);
226 /// ```
227 #[must_use]
228 pub fn isotope(mut self, mass: u16) -> Self {
229 self.isotope = Some(mass);
230 self
231 }
232
233 /// Sets the radical state (defaults to [`Radical::None`]).
234 ///
235 /// ```
236 /// use inchi::{Atom, Radical};
237 /// let _ = Atom::new("C").radical(Radical::Triplet);
238 /// ```
239 #[must_use]
240 pub fn radical(mut self, radical: Radical) -> Self {
241 self.radical = radical;
242 self
243 }
244
245 /// Sets how implicit hydrogens are handled (defaults to [`ImplicitH::Auto`]).
246 ///
247 /// ```
248 /// use inchi::{Atom, ImplicitH};
249 /// let _ = Atom::new("C").implicit_hydrogens(ImplicitH::Exactly(3));
250 /// ```
251 #[must_use]
252 pub fn implicit_hydrogens(mut self, h: ImplicitH) -> Self {
253 self.implicit_h = h;
254 self
255 }
256}
257
258/// A molecular structure assembled programmatically from atoms, bonds, and 0D
259/// stereo descriptors.
260///
261/// Atoms are referenced by the index returned from [`Molecule::add_atom`] (also
262/// the order in which they are added, starting at `0`).
263///
264/// ```
265/// use inchi::{Molecule, Atom, BondOrder};
266///
267/// // Ethanol: C-C-O (implicit hydrogens added automatically).
268/// let mut mol = Molecule::new();
269/// let c1 = mol.add_atom(Atom::new("C"));
270/// let c2 = mol.add_atom(Atom::new("C"));
271/// let o = mol.add_atom(Atom::new("O"));
272/// mol.add_bond(c1, c2, BondOrder::Single)?;
273/// mol.add_bond(c2, o, BondOrder::Single)?;
274///
275/// let out = mol.to_inchi(())?;
276/// assert_eq!(out.inchi(), "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3");
277/// # Ok::<(), inchi::InchiError>(())
278/// ```
279#[derive(Debug, Clone, Default, PartialEq)]
280pub struct Molecule {
281 atoms: Vec<Atom>,
282 bonds: Vec<(usize, usize, BondOrder)>,
283 stereo: Vec<Stereo>,
284 polymer_units: Vec<crate::polymer::PolymerUnit>,
285}
286
287impl Molecule {
288 /// Creates an empty molecule.
289 ///
290 /// ```
291 /// use inchi::Molecule;
292 /// let mol = Molecule::new();
293 /// assert_eq!(mol.atom_count(), 0);
294 /// ```
295 #[must_use]
296 pub fn new() -> Self {
297 Molecule::default()
298 }
299
300 /// Adds an atom and returns its index.
301 ///
302 /// ```
303 /// use inchi::{Molecule, Atom};
304 /// let mut mol = Molecule::new();
305 /// assert_eq!(mol.add_atom(Atom::new("C")), 0);
306 /// assert_eq!(mol.add_atom(Atom::new("O")), 1);
307 /// ```
308 pub fn add_atom(&mut self, atom: Atom) -> usize {
309 self.atoms.push(atom);
310 self.atoms.len() - 1
311 }
312
313 /// Adds a bond between two existing atoms.
314 ///
315 /// Returns [`InchiError::InvalidStructure`] if either index is out of range
316 /// or if `a == b`.
317 ///
318 /// ```
319 /// use inchi::{Molecule, Atom, BondOrder};
320 /// let mut mol = Molecule::new();
321 /// let a = mol.add_atom(Atom::new("C"));
322 /// let b = mol.add_atom(Atom::new("O"));
323 /// mol.add_bond(a, b, BondOrder::Double)?;
324 /// assert!(mol.add_bond(a, 99, BondOrder::Single).is_err());
325 /// # Ok::<(), inchi::InchiError>(())
326 /// ```
327 pub fn add_bond(&mut self, a: usize, b: usize, order: BondOrder) -> Result<()> {
328 if a == b {
329 return Err(InchiError::InvalidStructure {
330 reason: format!("bond connects atom {a} to itself"),
331 });
332 }
333 let n = self.atoms.len();
334 if a >= n || b >= n {
335 return Err(InchiError::InvalidStructure {
336 reason: format!("bond ({a}, {b}) references a nonexistent atom (have {n})"),
337 });
338 }
339 self.bonds.push((a, b, order));
340 Ok(())
341 }
342
343 /// Adds a 0D stereo descriptor. Indices are validated at conversion time.
344 ///
345 /// ```
346 /// use inchi::{Molecule, Atom, Stereo, Parity};
347 /// let mut mol = Molecule::new();
348 /// for el in ["C", "C", "N", "O"] { mol.add_atom(Atom::new(el)); }
349 /// mol.add_stereo(Stereo::Tetrahedral { center: 0, neighbors: [1, 2, 3, 0], parity: Parity::Odd });
350 /// assert_eq!(mol.stereo_count(), 1);
351 /// ```
352 pub fn add_stereo(&mut self, stereo: Stereo) {
353 self.stereo.push(stereo);
354 }
355
356 /// Adds a polymer structural repeating unit, switching InChI generation to
357 /// the extended `GetINCHIEx` entry point.
358 ///
359 /// Polymers require the [`Options::polymers`](crate::Options::polymers)
360 /// flag to be set and yield a non-standard, beta-flagged InChI. The unit's
361 /// atom indices refer to atoms already added to this molecule; the two
362 /// chain ends are normally capped with `"Zz"` star atoms.
363 ///
364 /// ```
365 /// use inchi::{Molecule, Atom, BondOrder, Options, Polymers, PolymerUnit};
366 /// // A polyethylene repeat unit: *-CH2-CH2-*
367 /// let mut mol = Molecule::new();
368 /// let s1 = mol.add_atom(Atom::new("Zz"));
369 /// let c1 = mol.add_atom(Atom::new("C"));
370 /// let c2 = mol.add_atom(Atom::new("C"));
371 /// let s2 = mol.add_atom(Atom::new("Zz"));
372 /// mol.add_bond(s1, c1, BondOrder::Single)?;
373 /// mol.add_bond(c1, c2, BondOrder::Single)?;
374 /// mol.add_bond(c2, s2, BondOrder::Single)?;
375 /// mol.add_polymer_unit(PolymerUnit::sru([c1, c2], [[s1, c1], [c2, s2]]));
376 /// let inchi = mol.to_inchi(Options::new().polymers(Polymers::On))?.into_inchi();
377 /// assert!(inchi.contains("/z"));
378 /// # Ok::<(), inchi::InchiError>(())
379 /// ```
380 pub fn add_polymer_unit(&mut self, unit: crate::polymer::PolymerUnit) {
381 self.polymer_units.push(unit);
382 }
383
384 /// The number of atoms.
385 ///
386 /// ```
387 /// # use inchi::{Molecule, Atom};
388 /// let mut mol = Molecule::new();
389 /// mol.add_atom(Atom::new("C"));
390 /// assert_eq!(mol.atom_count(), 1);
391 /// ```
392 #[must_use]
393 pub fn atom_count(&self) -> usize {
394 self.atoms.len()
395 }
396
397 /// The number of bonds.
398 ///
399 /// ```
400 /// # use inchi::{Molecule, Atom, BondOrder};
401 /// let mut mol = Molecule::new();
402 /// let a = mol.add_atom(Atom::new("C"));
403 /// let b = mol.add_atom(Atom::new("C"));
404 /// mol.add_bond(a, b, BondOrder::Single)?;
405 /// assert_eq!(mol.bond_count(), 1);
406 /// # Ok::<(), inchi::InchiError>(())
407 /// ```
408 #[must_use]
409 pub fn bond_count(&self) -> usize {
410 self.bonds.len()
411 }
412
413 /// The number of 0D stereo descriptors.
414 ///
415 /// ```
416 /// # use inchi::Molecule;
417 /// assert_eq!(Molecule::new().stereo_count(), 0);
418 /// ```
419 #[must_use]
420 pub fn stereo_count(&self) -> usize {
421 self.stereo.len()
422 }
423
424 /// Generates the InChI for this molecule using the given [`Options`].
425 ///
426 /// ```
427 /// use inchi::{Molecule, Atom};
428 /// // A lone oxygen atom becomes water once implicit H are added.
429 /// let mut mol = Molecule::new();
430 /// mol.add_atom(Atom::new("O"));
431 /// assert_eq!(mol.to_inchi(())?.inchi(), "InChI=1S/H2O/h1H2");
432 /// # Ok::<(), inchi::InchiError>(())
433 /// ```
434 pub fn to_inchi(&self, options: impl Into<Options>) -> Result<InchiOutput> {
435 let options = options.into();
436 let mut atoms = self.build_atoms()?;
437 let mut stereo = self.build_stereo()?;
438 let opts = crate::raw::to_cstring(&options.to_arg_string())?;
439
440 let num_atoms = i16::try_from(atoms.len()).map_err(|_| InchiError::InvalidStructure {
441 reason: format!("too many atoms ({})", atoms.len()),
442 })?;
443 let num_stereo = i16::try_from(stereo.len()).map_err(|_| InchiError::InvalidStructure {
444 reason: format!("too many stereo elements ({})", stereo.len()),
445 })?;
446
447 // SAFETY: `input` borrows the `atoms`/`stereo`/`opts` allocations, all
448 // of which outlive the `GetINCHI` call below. `GetINCHI` does not take
449 // ownership of the input (the caller owns it), and we serialize via the
450 // global lock. The output is owned by an `OutputGuard` that frees it.
451 let mut input: inchi_sys::inchi_Input = unsafe { std::mem::zeroed() };
452 input.atom = atoms.as_mut_ptr();
453 input.num_atoms = num_atoms;
454 input.stereo0D = if stereo.is_empty() {
455 std::ptr::null_mut()
456 } else {
457 stereo.as_mut_ptr()
458 };
459 input.num_stereo0D = num_stereo;
460 input.szOptions = opts.as_ptr() as *mut std::os::raw::c_char;
461
462 if self.polymer_units.is_empty() {
463 let _guard = crate::raw::lock();
464 let mut out = crate::raw::OutputGuard::new();
465 let rc = unsafe { inchi_sys::GetINCHI(&mut input, out.as_mut_ptr()) };
466 // Keep the input-backing allocations alive until after the FFI call.
467 drop(atoms);
468 drop(stereo);
469 drop(opts);
470 return crate::build_output(rc, &out);
471 }
472
473 // Polymer input requires the extended `GetINCHIEx` entry point. Build
474 // the polymer block, keeping every backing allocation alive across the
475 // call. `inchi_InputEx` shares its first fields with `inchi_Input`.
476 let mut backing = PolymerBacking::build(&self.polymer_units, self.atoms.len())?;
477 let mut input_ex: inchi_sys::inchi_InputEx = unsafe { std::mem::zeroed() };
478 input_ex.atom = atoms.as_mut_ptr();
479 input_ex.num_atoms = num_atoms;
480 input_ex.stereo0D = if stereo.is_empty() {
481 std::ptr::null_mut()
482 } else {
483 stereo.as_mut_ptr()
484 };
485 input_ex.num_stereo0D = num_stereo;
486 input_ex.szOptions = opts.as_ptr() as *mut std::os::raw::c_char;
487 input_ex.polymer = backing.as_mut_ptr();
488
489 let _guard = crate::raw::lock();
490 let mut out = crate::raw::OutputGuard::new();
491 let rc = unsafe { inchi_sys::GetINCHIEx(&mut input_ex, out.as_mut_ptr()) };
492 // Keep all input-backing allocations alive until after the FFI call.
493 drop(atoms);
494 drop(stereo);
495 drop(opts);
496 drop(backing);
497 crate::build_output(rc, &out)
498 }
499
500 fn build_atoms(&self) -> Result<Vec<inchi_sys::inchi_Atom>> {
501 if self.atoms.is_empty() {
502 return Err(InchiError::InvalidStructure {
503 reason: "molecule has no atoms".to_string(),
504 });
505 }
506 if self.atoms.len() > MAX_ATOMS {
507 return Err(InchiError::InvalidStructure {
508 reason: format!("too many atoms ({} > {MAX_ATOMS})", self.atoms.len()),
509 });
510 }
511
512 let mut raw: Vec<inchi_sys::inchi_Atom> = Vec::with_capacity(self.atoms.len());
513 for atom in &self.atoms {
514 let mut a: inchi_sys::inchi_Atom = unsafe { std::mem::zeroed() };
515 a.x = atom.x;
516 a.y = atom.y;
517 a.z = atom.z;
518 write_elname(&mut a.elname, &atom.element)?;
519 a.charge = atom.charge;
520 a.radical = atom.radical.code();
521 if let Some(mass) = atom.isotope {
522 a.isotopic_mass =
523 i16::try_from(mass).map_err(|_| InchiError::InvalidStructure {
524 reason: format!("isotopic mass {mass} out of range"),
525 })?;
526 }
527 a.num_iso_H = match atom.implicit_h {
528 ImplicitH::Auto => [-1, 0, 0, 0],
529 ImplicitH::Exactly(n) => [
530 i8::try_from(n).map_err(|_| InchiError::InvalidStructure {
531 reason: format!("implicit H count {n} out of range"),
532 })?,
533 0,
534 0,
535 0,
536 ],
537 };
538 raw.push(a);
539 }
540
541 // Populate the symmetric adjacency lists from the bond list. Each bond
542 // is recorded in both endpoints, as a molfile-derived input would be.
543 for &(a, b, order) in &self.bonds {
544 push_neighbor(&mut raw, a, b, order)?;
545 push_neighbor(&mut raw, b, a, order)?;
546 }
547
548 Ok(raw)
549 }
550
551 fn build_stereo(&self) -> Result<Vec<inchi_sys::inchi_Stereo0D>> {
552 let n = self.atoms.len();
553 let check = |idx: usize| -> Result<i16> {
554 if idx >= n {
555 return Err(InchiError::InvalidStructure {
556 reason: format!("stereo references nonexistent atom {idx} (have {n})"),
557 });
558 }
559 i16::try_from(idx).map_err(|_| InchiError::InvalidStructure {
560 reason: format!("atom index {idx} out of range"),
561 })
562 };
563
564 let mut raw = Vec::with_capacity(self.stereo.len());
565 for stereo in &self.stereo {
566 let mut s: inchi_sys::inchi_Stereo0D = unsafe { std::mem::zeroed() };
567 match *stereo {
568 Stereo::Tetrahedral {
569 center,
570 neighbors,
571 parity,
572 } => {
573 s.central_atom = check(center)?;
574 s.neighbor = [
575 check(neighbors[0])?,
576 check(neighbors[1])?,
577 check(neighbors[2])?,
578 check(neighbors[3])?,
579 ];
580 s.type_ = inchi_sys::INCHI_StereoType_Tetrahedral as i8;
581 s.parity = parity.code();
582 }
583 Stereo::DoubleBond { ends, parity } => {
584 s.central_atom = inchi_sys::NO_ATOM as i16;
585 s.neighbor = [
586 check(ends[0])?,
587 check(ends[1])?,
588 check(ends[2])?,
589 check(ends[3])?,
590 ];
591 s.type_ = inchi_sys::INCHI_StereoType_DoubleBond as i8;
592 s.parity = parity.code();
593 }
594 Stereo::Allene {
595 center,
596 ends,
597 parity,
598 } => {
599 s.central_atom = check(center)?;
600 s.neighbor = [
601 check(ends[0])?,
602 check(ends[1])?,
603 check(ends[2])?,
604 check(ends[3])?,
605 ];
606 s.type_ = inchi_sys::INCHI_StereoType_Allene as i8;
607 s.parity = parity.code();
608 }
609 }
610 raw.push(s);
611 }
612 Ok(raw)
613 }
614}
615
616/// Owns every heap allocation behind an [`inchi_sys::inchi_Input_Polymer`] so
617/// the C side sees stable pointers for the whole `GetINCHIEx` call.
618///
619/// Field order matters only for clarity; all pointers are taken after the
620/// backing vectors are fully populated, and the inner buffers stay put for the
621/// lifetime of the value.
622struct PolymerBacking {
623 // Per-unit 1-based atom lists (SAL) and crossing-bond lists (SBL).
624 alists: Vec<Vec<std::os::raw::c_int>>,
625 blists: Vec<Vec<std::os::raw::c_int>>,
626 // The unit structs and the array of pointers to them.
627 units: Vec<inchi_sys::inchi_Input_PolymerUnit>,
628 unit_ptrs: Vec<*mut inchi_sys::inchi_Input_PolymerUnit>,
629 polymer: inchi_sys::inchi_Input_Polymer,
630}
631
632impl PolymerBacking {
633 fn build(units_in: &[crate::polymer::PolymerUnit], num_atoms: usize) -> Result<Box<Self>> {
634 let one_based = |idx: usize| -> Result<std::os::raw::c_int> {
635 if idx >= num_atoms {
636 return Err(InchiError::InvalidStructure {
637 reason: format!(
638 "polymer unit references nonexistent atom {idx} (have {num_atoms})"
639 ),
640 });
641 }
642 i32::try_from(idx + 1).map_err(|_| InchiError::InvalidStructure {
643 reason: format!("atom index {idx} out of range"),
644 })
645 };
646
647 let mut alists = Vec::with_capacity(units_in.len());
648 let mut blists = Vec::with_capacity(units_in.len());
649 for unit in units_in {
650 let mut alist = Vec::with_capacity(unit.atoms.len());
651 for &a in &unit.atoms {
652 alist.push(one_based(a)?);
653 }
654 let mut blist = Vec::with_capacity(unit.crossing_bonds.len() * 2);
655 for &[a, b] in &unit.crossing_bonds {
656 blist.push(one_based(a)?);
657 blist.push(one_based(b)?);
658 }
659 alists.push(alist);
660 blists.push(blist);
661 }
662
663 // Allocate boxed so the struct's address (and thus every interior
664 // pointer the C side stores) is stable even if the caller moves us.
665 let mut me = Box::new(PolymerBacking {
666 alists,
667 blists,
668 units: Vec::with_capacity(units_in.len()),
669 unit_ptrs: Vec::with_capacity(units_in.len()),
670 polymer: unsafe { std::mem::zeroed() },
671 });
672
673 // Build the unit structs into a local Vec, taking stable buffer
674 // pointers from the already-populated `alists`/`blists` (which live in
675 // `me`). Using a local Vec sidesteps simultaneous borrows of `me`.
676 let mut built = Vec::with_capacity(units_in.len());
677 let lists = me.alists.iter().zip(me.blists.iter());
678 for (unit, (alist, blist)) in units_in.iter().zip(lists) {
679 let mut raw: inchi_sys::inchi_Input_PolymerUnit = unsafe { std::mem::zeroed() };
680 raw.id = unit.id;
681 raw.label = unit.label;
682 raw.type_ = unit.kind.code();
683 raw.subtype = unit.subtype.code();
684 raw.conn = unit.connection.code();
685 raw.na = i32::try_from(unit.atoms.len()).unwrap_or(0);
686 raw.nb = i32::try_from(unit.crossing_bonds.len()).unwrap_or(0);
687 write_subscript(&mut raw.smt, &unit.subscript);
688 // SAFETY: the inner buffers live as long as `me`; the C side never
689 // mutates them, so casting the const buffer pointer to `*mut` is sound.
690 raw.alist = if alist.is_empty() {
691 std::ptr::null_mut()
692 } else {
693 alist.as_ptr() as *mut std::os::raw::c_int
694 };
695 raw.blist = if blist.is_empty() {
696 std::ptr::null_mut()
697 } else {
698 blist.as_ptr() as *mut std::os::raw::c_int
699 };
700 built.push(raw);
701 }
702 me.units = built;
703
704 // Record the array of pointers into the now-stable `units`.
705 let mut ptrs = Vec::with_capacity(me.units.len());
706 for u in me.units.iter_mut() {
707 ptrs.push(u as *mut inchi_sys::inchi_Input_PolymerUnit);
708 }
709 me.unit_ptrs = ptrs;
710 me.polymer.n = i32::try_from(me.unit_ptrs.len()).unwrap_or(0);
711 me.polymer.units = me.unit_ptrs.as_mut_ptr();
712 Ok(me)
713 }
714
715 fn as_mut_ptr(&mut self) -> *mut inchi_sys::inchi_Input_Polymer {
716 &mut self.polymer
717 }
718}
719
720/// Writes a polymer Sgroup subscript into the fixed-size `smt` field (80 bytes,
721/// NUL-terminated), truncating if necessary.
722fn write_subscript(dst: &mut [std::os::raw::c_char; 80], subscript: &str) {
723 let max = dst.len().saturating_sub(1);
724 for (slot, &b) in dst.iter_mut().zip(subscript.as_bytes().iter().take(max)) {
725 *slot = b as std::os::raw::c_char;
726 }
727}
728
729/// Records `to` as a neighbor of `from` in the raw atom adjacency list.
730fn push_neighbor(
731 atoms: &mut [inchi_sys::inchi_Atom],
732 from: usize,
733 to: usize,
734 order: BondOrder,
735) -> Result<()> {
736 let to_idx = i16::try_from(to).map_err(|_| InchiError::InvalidStructure {
737 reason: format!("atom index {to} out of range"),
738 })?;
739 let atom = atoms
740 .get_mut(from)
741 .ok_or_else(|| InchiError::InvalidStructure {
742 reason: format!("bond references nonexistent atom {from}"),
743 })?;
744 let slot = atom.num_bonds as usize;
745 if slot >= MAX_BONDS_PER_ATOM {
746 return Err(InchiError::InvalidStructure {
747 reason: format!("atom {from} exceeds the maximum of {MAX_BONDS_PER_ATOM} bonds"),
748 });
749 }
750 if let (Some(nbr), Some(bt)) = (atom.neighbor.get_mut(slot), atom.bond_type.get_mut(slot)) {
751 *nbr = to_idx;
752 *bt = order.code();
753 atom.num_bonds += 1;
754 Ok(())
755 } else {
756 Err(InchiError::InvalidStructure {
757 reason: format!("atom {from} bond slot {slot} out of range"),
758 })
759 }
760}
761
762/// Writes an element symbol into the fixed-size `elname` field, validating it.
763fn write_elname(dst: &mut [std::os::raw::c_char; ELNAME_CAP], symbol: &str) -> Result<()> {
764 let bytes = symbol.as_bytes();
765 if bytes.is_empty() {
766 return Err(InchiError::InvalidStructure {
767 reason: "empty element symbol".to_string(),
768 });
769 }
770 if !symbol.is_ascii() {
771 return Err(InchiError::InvalidStructure {
772 reason: format!("element symbol {symbol:?} is not ASCII"),
773 });
774 }
775 if bytes.len() >= ELNAME_CAP {
776 return Err(InchiError::InvalidStructure {
777 reason: format!(
778 "element symbol {symbol:?} is too long (max {} chars)",
779 ELNAME_CAP - 1
780 ),
781 });
782 }
783 for (slot, &b) in dst.iter_mut().zip(bytes) {
784 *slot = b as std::os::raw::c_char;
785 }
786 Ok(())
787}