use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref ELEMENT_ENTRY_RE: Regex = Regex::new(r"^([a-zA-Z]{1,3}):(.*):(.*)$").unwrap();
static ref SHELL_INFO_RE: Regex = Regex::new(r"^([a-zA-Z])\s+(\d+)\s+(\d+)$").unwrap();
}
pub fn read_gbasis(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let basis_lines: Vec<String> = basis_str.lines().map(|s| s.trim().to_string()).collect();
let basis_lines = helpers::prune_lines(&basis_lines, "!#", true, true);
if basis_lines.is_empty() {
return Ok(BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema {
schema_type: "minimal".to_string(),
schema_version: "0.1".to_string(),
},
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
});
}
let element_sections = helpers::partition_lines(
&basis_lines,
|x| ELEMENT_ENTRY_RE.is_match(x),
0,
None,
None,
None,
4, true,
)?;
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
let mut found_basis_names: HashSet<String> = HashSet::new();
for element_lines in &element_sections {
if element_lines.len() < 4 {
continue;
}
let parsed =
helpers::parse_line_regex(&ELEMENT_ENTRY_RE, &element_lines[0], "Element entry: sym:basis:pattern")?;
let element_sym = &parsed[0];
let basis_name = &parsed[1];
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
found_basis_names.insert(basis_name.to_lowercase());
let max_am: i32 = element_lines[1].trim().parse().map_or(bse_raise!(ValueError, "Invalid max_am"), Ok)?;
let shell_blocks = helpers::partition_lines(
&element_lines[2..],
|x| x.chars().next().is_some_and(|c| c.is_ascii_alphabetic()),
0,
None,
None,
None,
2, true,
)?;
if (max_am + 1) as usize != shell_blocks.len() {
eprintln!(
"Warning: Expected {} blocks for element {}, found {}",
max_am + 1,
element_sym,
shell_blocks.len()
);
}
for shell_lines in &shell_blocks {
let parsed = helpers::parse_line_regex(&SHELL_INFO_RE, &shell_lines[0], "Shell: AM, nprim, ngen")?;
let amchar = &parsed[0];
let nprim: usize = parsed[1].parse().map_or(bse_raise!(ValueError, "Invalid nprim"), Ok)?;
let ngen: usize = parsed[2].parse().map_or(bse_raise!(ValueError, "Invalid ngen"), Ok)?;
let shell_am = lut::amchar_to_int(amchar, HIK).unwrap_or_default();
if shell_am.len() > 1 {
bse_raise!(ValueError, "Fused AM not supported by gbasis reader")?;
}
let func_type = lut::function_type_from_am(&shell_am, "gto", "spherical");
let (exponents, coefficients) =
helpers::parse_primitive_matrix(&shell_lines[1..], Some(nprim), Some(ngen), None)?;
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: shell_am,
exponents,
coefficients,
};
basis_dict
.elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
}
if found_basis_names.len() > 1 {
eprintln!("Warning: Multiple basis sets found in file: {}", found_basis_names.iter().join(", "));
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
if let Some(name) = found_basis_names.iter().next() {
basis_dict.name = name.clone();
}
Ok(basis_dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_gbasis_sample() {
let sample = r"H:sto-3g:(3s) -> [1s]
0
S 3 1
0.4231582280E+01 0.1000000000E+01
0.2303687730E+01 0.5000000000E+00
0.1612775880E+01 0.1666666667E+00";
let basis = read_gbasis(sample).unwrap();
println!("{basis:#?}");
}
}