1use super::cell::{CellParameters, UnitCell};
9
10#[derive(Debug, Clone)]
12pub struct CifStructure {
13 pub data_block: String,
15 pub cell: UnitCell,
17 pub cell_params: CellParameters,
19 pub space_group_hm: Option<String>,
21 pub space_group_number: Option<u16>,
23 pub atom_sites: Vec<CifAtomSite>,
25}
26
27#[derive(Debug, Clone)]
29pub struct CifAtomSite {
30 pub label: String,
32 pub type_symbol: String,
34 pub atomic_number: u8,
36 pub frac_x: f64,
38 pub frac_y: f64,
39 pub frac_z: f64,
40 pub occupancy: f64,
42}
43
44pub fn parse_cif(input: &str) -> Result<CifStructure, String> {
49 let mut data_block = String::new();
50 let mut a = None;
51 let mut b = None;
52 let mut c = None;
53 let mut alpha = None;
54 let mut beta = None;
55 let mut gamma = None;
56 let mut sg_hm: Option<String> = None;
57 let mut sg_number: Option<u16> = None;
58 let mut atom_sites = Vec::new();
59
60 let lines: Vec<&str> = input.lines().collect();
61 let mut i = 0;
62
63 while i < lines.len() {
64 let line = lines[i].trim();
65
66 if line.is_empty() || line.starts_with('#') {
68 i += 1;
69 continue;
70 }
71
72 if let Some(name) = line.strip_prefix("data_") {
74 data_block = name.trim().to_string();
75 i += 1;
76 continue;
77 }
78
79 if line.starts_with("_cell_length_a") {
81 a = parse_cif_float(extract_value(line, "_cell_length_a"));
82 i += 1;
83 continue;
84 }
85 if line.starts_with("_cell_length_b") {
86 b = parse_cif_float(extract_value(line, "_cell_length_b"));
87 i += 1;
88 continue;
89 }
90 if line.starts_with("_cell_length_c") {
91 c = parse_cif_float(extract_value(line, "_cell_length_c"));
92 i += 1;
93 continue;
94 }
95 if line.starts_with("_cell_angle_alpha") {
96 alpha = parse_cif_float(extract_value(line, "_cell_angle_alpha"));
97 i += 1;
98 continue;
99 }
100 if line.starts_with("_cell_angle_beta") {
101 beta = parse_cif_float(extract_value(line, "_cell_angle_beta"));
102 i += 1;
103 continue;
104 }
105 if line.starts_with("_cell_angle_gamma") {
106 gamma = parse_cif_float(extract_value(line, "_cell_angle_gamma"));
107 i += 1;
108 continue;
109 }
110
111 if line.starts_with("_symmetry_space_group_name_H-M")
113 || line.starts_with("_space_group_name_H-M_alt")
114 {
115 let val = extract_value_quoted(line);
116 if !val.is_empty() {
117 sg_hm = Some(val);
118 }
119 i += 1;
120 continue;
121 }
122 if line.starts_with("_symmetry_Int_Tables_number")
123 || line.starts_with("_space_group_IT_number")
124 {
125 let val = extract_value_simple(line);
126 sg_number = val.parse::<u16>().ok();
127 i += 1;
128 continue;
129 }
130
131 if line == "loop_" {
133 let mut col_names = Vec::new();
135 let mut j = i + 1;
136 while j < lines.len() {
137 let col_line = lines[j].trim();
138 if col_line.starts_with("_atom_site_") {
139 col_names.push(col_line.to_string());
140 j += 1;
141 } else {
142 break;
143 }
144 }
145
146 if !col_names.is_empty() {
147 let label_col = col_names.iter().position(|c| c == "_atom_site_label");
149 let type_col = col_names.iter().position(|c| c == "_atom_site_type_symbol");
150 let fx_col = col_names.iter().position(|c| c == "_atom_site_fract_x");
151 let fy_col = col_names.iter().position(|c| c == "_atom_site_fract_y");
152 let fz_col = col_names.iter().position(|c| c == "_atom_site_fract_z");
153 let occ_col = col_names.iter().position(|c| c == "_atom_site_occupancy");
154 let cx_col = col_names.iter().position(|c| c == "_atom_site_Cartn_x");
155 let cy_col = col_names.iter().position(|c| c == "_atom_site_Cartn_y");
156 let cz_col = col_names.iter().position(|c| c == "_atom_site_Cartn_z");
157
158 let has_frac = fx_col.is_some() && fy_col.is_some() && fz_col.is_some();
159 let has_cart = cx_col.is_some() && cy_col.is_some() && cz_col.is_some();
160
161 while j < lines.len() {
163 let data_line = lines[j].trim();
164 if data_line.is_empty()
165 || data_line.starts_with('#')
166 || data_line.starts_with('_')
167 || data_line == "loop_"
168 {
169 break;
170 }
171
172 let fields: Vec<&str> = split_cif_fields(data_line);
173 if fields.len() < col_names.len() {
174 j += 1;
175 continue;
176 }
177
178 let label = label_col.map(|c| fields[c].to_string()).unwrap_or_default();
179 let type_sym = type_col
180 .map(|c| fields[c].to_string())
181 .or_else(|| {
182 Some(extract_element_from_label(&label))
184 })
185 .unwrap_or_default();
186
187 let (fx, fy, fz) = if has_frac {
188 (
189 parse_cif_float(fields[fx_col.unwrap()]).unwrap_or(0.0),
190 parse_cif_float(fields[fy_col.unwrap()]).unwrap_or(0.0),
191 parse_cif_float(fields[fz_col.unwrap()]).unwrap_or(0.0),
192 )
193 } else if has_cart {
194 (
196 parse_cif_float(fields[cx_col.unwrap()]).unwrap_or(0.0),
197 parse_cif_float(fields[cy_col.unwrap()]).unwrap_or(0.0),
198 parse_cif_float(fields[cz_col.unwrap()]).unwrap_or(0.0),
199 )
200 } else {
201 (0.0, 0.0, 0.0)
202 };
203
204 let occ = occ_col
205 .and_then(|c| parse_cif_float(fields[c]))
206 .unwrap_or(1.0);
207
208 let z = symbol_to_atomic_number(&type_sym);
209
210 atom_sites.push((
211 CifAtomSite {
212 label,
213 type_symbol: type_sym,
214 atomic_number: z,
215 frac_x: fx,
216 frac_y: fy,
217 frac_z: fz,
218 occupancy: occ,
219 },
220 has_cart && !has_frac,
221 ));
222
223 j += 1;
224 }
225
226 i = j;
227 continue;
228 }
229
230 i += 1;
231 continue;
232 }
233
234 i += 1;
235 }
236
237 let a = a.ok_or("Missing _cell_length_a")?;
239 let b = b.ok_or("Missing _cell_length_b")?;
240 let c = c.ok_or("Missing _cell_length_c")?;
241 let alpha = alpha.unwrap_or(90.0);
242 let beta = beta.unwrap_or(90.0);
243 let gamma = gamma.unwrap_or(90.0);
244
245 let cell_params = CellParameters {
246 a,
247 b,
248 c,
249 alpha,
250 beta,
251 gamma,
252 };
253 let cell = UnitCell::from_parameters(&cell_params);
254
255 let atom_sites: Vec<CifAtomSite> = atom_sites
257 .into_iter()
258 .map(|(mut site, is_cart)| {
259 if is_cart {
260 let frac = cell.cart_to_frac([site.frac_x, site.frac_y, site.frac_z]);
261 site.frac_x = frac[0];
262 site.frac_y = frac[1];
263 site.frac_z = frac[2];
264 }
265 site
266 })
267 .collect();
268
269 Ok(CifStructure {
270 data_block,
271 cell,
272 cell_params,
273 space_group_hm: sg_hm,
274 space_group_number: sg_number,
275 atom_sites,
276 })
277}
278
279pub fn write_cif(structure: &CifStructure) -> String {
281 let mut out = String::new();
282
283 out.push_str(&format!("data_{}\n", structure.data_block));
284 out.push_str("#\n");
285
286 let p = &structure.cell_params;
288 out.push_str(&format!("_cell_length_a {:.6}\n", p.a));
289 out.push_str(&format!("_cell_length_b {:.6}\n", p.b));
290 out.push_str(&format!("_cell_length_c {:.6}\n", p.c));
291 out.push_str(&format!(
292 "_cell_angle_alpha {:.4}\n",
293 p.alpha
294 ));
295 out.push_str(&format!(
296 "_cell_angle_beta {:.4}\n",
297 p.beta
298 ));
299 out.push_str(&format!(
300 "_cell_angle_gamma {:.4}\n",
301 p.gamma
302 ));
303 out.push_str("#\n");
304
305 if let Some(ref hm) = structure.space_group_hm {
307 out.push_str(&format!("_symmetry_space_group_name_H-M '{}'\n", hm));
308 }
309 if let Some(num) = structure.space_group_number {
310 out.push_str(&format!("_symmetry_Int_Tables_number {}\n", num));
311 }
312 out.push_str("#\n");
313
314 if !structure.atom_sites.is_empty() {
316 out.push_str("loop_\n");
317 out.push_str("_atom_site_label\n");
318 out.push_str("_atom_site_type_symbol\n");
319 out.push_str("_atom_site_fract_x\n");
320 out.push_str("_atom_site_fract_y\n");
321 out.push_str("_atom_site_fract_z\n");
322 out.push_str("_atom_site_occupancy\n");
323 for site in &structure.atom_sites {
324 out.push_str(&format!(
325 "{:<8} {:<4} {:.6} {:.6} {:.6} {:.4}\n",
326 site.label, site.type_symbol, site.frac_x, site.frac_y, site.frac_z, site.occupancy
327 ));
328 }
329 }
330
331 out.push_str("#END\n");
332 out
333}
334
335fn extract_value<'a>(line: &'a str, tag: &str) -> &'a str {
339 line[tag.len()..].trim()
340}
341
342fn extract_value_simple(line: &str) -> &str {
344 if let Some(pos) = line.find(char::is_whitespace) {
346 line[pos..].trim()
347 } else {
348 ""
349 }
350}
351
352fn extract_value_quoted(line: &str) -> String {
354 let val = extract_value_simple(line);
355 val.trim_matches('\'').trim_matches('"').trim().to_string()
356}
357
358fn parse_cif_float(s: &str) -> Option<f64> {
360 let s = s.trim();
361 if s == "?" || s == "." {
362 return None;
363 }
364 let clean = if let Some(paren) = s.find('(') {
366 &s[..paren]
367 } else {
368 s
369 };
370 clean.parse::<f64>().ok()
371}
372
373fn split_cif_fields(line: &str) -> Vec<&str> {
375 let mut fields = Vec::new();
376 let bytes = line.as_bytes();
377 let mut i = 0;
378 let len = bytes.len();
379
380 while i < len {
381 while i < len && bytes[i] == b' ' {
383 i += 1;
384 }
385 if i >= len {
386 break;
387 }
388
389 if bytes[i] == b'\'' {
390 let start = i + 1;
392 i = start;
393 while i < len && bytes[i] != b'\'' {
394 i += 1;
395 }
396 fields.push(&line[start..i]);
397 if i < len {
398 i += 1; }
400 } else {
401 let start = i;
403 while i < len && bytes[i] != b' ' {
404 i += 1;
405 }
406 fields.push(&line[start..i]);
407 }
408 }
409
410 fields
411}
412
413fn extract_element_from_label(label: &str) -> String {
415 let mut sym = String::new();
416 let mut chars = label.chars();
417 if let Some(c) = chars.next() {
418 if c.is_ascii_uppercase() {
419 sym.push(c);
420 if let Some(c2) = chars.next() {
421 if c2.is_ascii_lowercase() {
422 sym.push(c2);
423 }
424 }
425 }
426 }
427 if sym.is_empty() {
428 "X".to_string()
429 } else {
430 sym
431 }
432}
433
434fn symbol_to_atomic_number(sym: &str) -> u8 {
436 match sym.trim() {
437 "H" => 1,
438 "He" => 2,
439 "Li" => 3,
440 "Be" => 4,
441 "B" => 5,
442 "C" => 6,
443 "N" => 7,
444 "O" => 8,
445 "F" => 9,
446 "Ne" => 10,
447 "Na" => 11,
448 "Mg" => 12,
449 "Al" => 13,
450 "Si" => 14,
451 "P" => 15,
452 "S" => 16,
453 "Cl" => 17,
454 "Ar" => 18,
455 "K" => 19,
456 "Ca" => 20,
457 "Sc" => 21,
458 "Ti" => 22,
459 "V" => 23,
460 "Cr" => 24,
461 "Mn" => 25,
462 "Fe" => 26,
463 "Co" => 27,
464 "Ni" => 28,
465 "Cu" => 29,
466 "Zn" => 30,
467 "Ga" => 31,
468 "Ge" => 32,
469 "As" => 33,
470 "Se" => 34,
471 "Br" => 35,
472 "Kr" => 36,
473 "Rb" => 37,
474 "Sr" => 38,
475 "Y" => 39,
476 "Zr" => 40,
477 "Nb" => 41,
478 "Mo" => 42,
479 "Tc" => 43,
480 "Ru" => 44,
481 "Rh" => 45,
482 "Pd" => 46,
483 "Ag" => 47,
484 "Cd" => 48,
485 "In" => 49,
486 "Sn" => 50,
487 "Sb" => 51,
488 "Te" => 52,
489 "I" => 53,
490 "Xe" => 54,
491 "Cs" => 55,
492 "Ba" => 56,
493 "La" => 57,
494 "Ce" => 58,
495 "Pr" => 59,
496 "Nd" => 60,
497 "Pm" => 61,
498 "Sm" => 62,
499 "Eu" => 63,
500 "Gd" => 64,
501 "Tb" => 65,
502 "Dy" => 66,
503 "Ho" => 67,
504 "Er" => 68,
505 "Tm" => 69,
506 "Yb" => 70,
507 "Lu" => 71,
508 "Hf" => 72,
509 "Ta" => 73,
510 "W" => 74,
511 "Re" => 75,
512 "Os" => 76,
513 "Ir" => 77,
514 "Pt" => 78,
515 "Au" => 79,
516 "Hg" => 80,
517 "Tl" => 81,
518 "Pb" => 82,
519 "Bi" => 83,
520 _ => 0,
521 }
522}
523
524#[cfg(test)]
525mod tests {
526 use super::*;
527
528 #[test]
529 fn test_parse_nacl_cif() {
530 let cif = r#"
531data_NaCl
532_cell_length_a 5.640
533_cell_length_b 5.640
534_cell_length_c 5.640
535_cell_angle_alpha 90.000
536_cell_angle_beta 90.000
537_cell_angle_gamma 90.000
538_symmetry_space_group_name_H-M 'F m -3 m'
539_symmetry_Int_Tables_number 225
540#
541loop_
542_atom_site_label
543_atom_site_type_symbol
544_atom_site_fract_x
545_atom_site_fract_y
546_atom_site_fract_z
547_atom_site_occupancy
548Na1 Na 0.000000 0.000000 0.000000 1.0000
549Cl1 Cl 0.500000 0.500000 0.500000 1.0000
550"#;
551 let structure = parse_cif(cif).unwrap();
552 assert_eq!(structure.data_block, "NaCl");
553 assert!((structure.cell_params.a - 5.640).abs() < 1e-6);
554 assert_eq!(structure.space_group_hm, Some("F m -3 m".to_string()));
555 assert_eq!(structure.space_group_number, Some(225));
556 assert_eq!(structure.atom_sites.len(), 2);
557 assert_eq!(structure.atom_sites[0].type_symbol, "Na");
558 assert_eq!(structure.atom_sites[0].atomic_number, 11);
559 assert_eq!(structure.atom_sites[1].type_symbol, "Cl");
560 assert_eq!(structure.atom_sites[1].atomic_number, 17);
561 assert!((structure.atom_sites[1].frac_x - 0.5).abs() < 1e-6);
562 }
563
564 #[test]
565 fn test_roundtrip_cif() {
566 let original = CifStructure {
567 data_block: "test".to_string(),
568 cell: UnitCell::from_parameters(&CellParameters {
569 a: 10.0,
570 b: 10.0,
571 c: 10.0,
572 alpha: 90.0,
573 beta: 90.0,
574 gamma: 90.0,
575 }),
576 cell_params: CellParameters {
577 a: 10.0,
578 b: 10.0,
579 c: 10.0,
580 alpha: 90.0,
581 beta: 90.0,
582 gamma: 90.0,
583 },
584 space_group_hm: Some("P m -3 m".to_string()),
585 space_group_number: Some(221),
586 atom_sites: vec![CifAtomSite {
587 label: "Fe1".to_string(),
588 type_symbol: "Fe".to_string(),
589 atomic_number: 26,
590 frac_x: 0.0,
591 frac_y: 0.0,
592 frac_z: 0.0,
593 occupancy: 1.0,
594 }],
595 };
596
597 let cif_text = write_cif(&original);
598 let parsed = parse_cif(&cif_text).unwrap();
599 assert_eq!(parsed.data_block, "test");
600 assert!((parsed.cell_params.a - 10.0).abs() < 1e-4);
601 assert_eq!(parsed.atom_sites.len(), 1);
602 assert_eq!(parsed.atom_sites[0].type_symbol, "Fe");
603 assert_eq!(parsed.atom_sites[0].atomic_number, 26);
604 }
605
606 #[test]
607 fn test_parse_cif_float_uncertainty() {
608 assert_eq!(parse_cif_float("5.640(1)"), Some(5.640));
609 assert_eq!(parse_cif_float("90.000"), Some(90.0));
610 assert_eq!(parse_cif_float("?"), None);
611 }
612
613 #[test]
614 fn test_extract_element_from_label() {
615 assert_eq!(extract_element_from_label("Na1"), "Na");
616 assert_eq!(extract_element_from_label("Fe2+"), "Fe");
617 assert_eq!(extract_element_from_label("O3"), "O");
618 assert_eq!(extract_element_from_label("C12"), "C");
619 }
620}