use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref ELEMENT_RE: Regex = Regex::new(r"^-?([A-Za-z]{1,3})(?:\s+0)?$").unwrap();
static ref ECP_SHELL_START_RE: Regex = Regex::new(r"^([A-Za-z]{1,3})\s+(\d+)\s+(\d+)$").unwrap();
static ref ECP_AM_NELEC_RE: Regex = Regex::new(r"^\S+\s+(\d+)\s+(\d+)$").unwrap();
static ref AM_LINE_RE: Regex =
Regex::new(&format!(r"^([A-Za-z]+)\s+(\d+)((?:\s+{})+)$", helpers::FLOATING_RE.as_str())).unwrap();
static ref EXPLICIT_AM_LINE_RE: Regex =
Regex::new(&format!(r"^\s*L=(\d+)\s+(\d+)((?:\s+{})+)$", helpers::FLOATING_RE.as_str())).unwrap();
}
fn parse_electron_lines(
elements: &mut HashMap<String, BseBasisElement>,
basis_lines: &[String],
) -> Result<(), BseError> {
let last = basis_lines.last().map_or(bse_raise!(ValueError, "Empty basis lines"), Ok)?;
if last != "****" {
bse_raise!(ValueError, "Electron shell is missing terminating ****")?
}
let element_sym = basis_lines[0]
.split_whitespace()
.next()
.map_or(bse_raise!(ValueError, "Invalid element line: {}", basis_lines[0]), Ok)?;
let element_sym = element_sym.trim_start_matches('-');
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
let shell_blocks = helpers::partition_lines(
&basis_lines[1..basis_lines.len() - 1], |x| x.chars().next().is_some_and(|c| c.is_alphabetic()),
0,
None,
None,
None,
1,
true,
)?;
for sh_lines in shell_blocks {
let (shell_am, nprim, scaling_factors) = if AM_LINE_RE.is_match(&sh_lines[0]) {
let parsed = helpers::parse_line_regex(&AM_LINE_RE, &sh_lines[0], "Shell AM, nprim, scaling")?;
let shell_am = lut::amchar_to_int(&parsed[0], true)
.map_or(bse_raise!(ValueError, "Unknown angular momentum: {}", parsed[0]), Ok)?;
(shell_am, parsed[1].clone(), parsed[2].clone())
} else if EXPLICIT_AM_LINE_RE.is_match(&sh_lines[0]) {
let parsed = helpers::parse_line_regex(&EXPLICIT_AM_LINE_RE, &sh_lines[0], "Shell AM, nprim, scaling")?;
let shell_am = vec![parsed[0]
.parse()
.map_or(bse_raise!(ValueError, "Invalid angular momentum: {}", parsed[0]), Ok)?];
(shell_am, parsed[1].clone(), parsed[2].clone())
} else {
return bse_raise!(ValueError, "Failed to parse shell block starting on line: {}", sh_lines[0]);
};
let func_type = lut::function_type_from_am(&shell_am, "gto", "spherical");
let scaling_factors = helpers::replace_d(&scaling_factors);
let mut scaling_factors: Vec<f64> = scaling_factors
.split_whitespace()
.map(|s| s.parse().map_err(|_| BseError::ValueError(format!("Invalid scaling factor: {s}"))))
.collect::<Result<_, _>>()?;
scaling_factors.retain(|&x| x != 0.0);
if scaling_factors.is_empty() {
bse_raise!(ValueError, "No scaling factors given for element {element_sym}: Line: {}", sh_lines[0])?;
}
if scaling_factors.len() > 1 {
bse_raise!(NotImplementedError, "Number of scaling factors > 1")?;
}
let scaling_factor = scaling_factors[0].powi(2);
let has_scaling = (scaling_factor - 1.0).abs() > f64::EPSILON;
let ngen = shell_am.len();
let nprim = nprim.parse().map_or(bse_raise!(ValueError, "Invalid nprim value: {nprim}"), Ok)?;
let (mut exponents, coefficients) =
helpers::parse_primitive_matrix(&sh_lines[1..], Some(nprim), Some(ngen), None)?;
if has_scaling {
exponents = exponents
.into_iter()
.map(|ex| {
let ex_val: f64 = ex.parse().map_or(bse_raise!(ValueError, "Invalid exponent: {ex}"), Ok)?;
let scaled = ex_val * scaling_factor;
let formatted = format!("{scaled:.16E}");
let mut parts = formatted.split('E');
let mantissa = parts.next().unwrap();
let exponent = parts.next().unwrap();
let trimmed_mantissa = mantissa.trim_end_matches('0');
let mantissa = if trimmed_mantissa.ends_with('.') {
format!("{trimmed_mantissa}0")
} else {
trimmed_mantissa.to_string()
};
Ok(format!("{mantissa}E{exponent}"))
})
.collect::<Result<Vec<String>, BseError>>()?;
}
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: shell_am,
exponents,
coefficients,
};
elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
Ok(())
}
fn parse_ecp_lines(elements: &mut HashMap<String, BseBasisElement>, basis_lines: &[String]) -> Result<(), BseError> {
let element_sym = basis_lines[0]
.split_whitespace()
.next()
.map_or(bse_raise!(ValueError, "Invalid element line: {}", basis_lines[0]), Ok)?;
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
let parsed = helpers::parse_line_regex(&ECP_AM_NELEC_RE, &basis_lines[1], "ECP max_am, nelec")?;
let max_am = parsed[0].parse().map_or(bse_raise!(ValueError, "Invalid max_am value: {}", parsed[0]), Ok)?;
let ecp_electrons = parsed[1].parse().map_or(bse_raise!(ValueError, "Invalid nelec value: {}", parsed[1]), Ok)?;
let ecp_blocks = helpers::partition_lines(&basis_lines[2..], helpers::is_integer, 1, None, None, None, 1, true)?;
for pot_lines in ecp_blocks {
let nlines: i32 = pot_lines[1]
.parse()
.map_or(bse_raise!(ValueError, "Number of lines for potential is not an integer: {}", pot_lines[1]), Ok)?;
if nlines <= 0 {
bse_raise!(ValueError, "Number of lines for potential is <= 0")?;
}
if pot_lines.len() as i32 != (nlines + 2) {
bse_raise!(ValueError, "Number of lines is incorrect. Expected {nlines}, got {}", pot_lines.len() - 2)?;
}
let ecp_data = helpers::parse_ecp_table(&pot_lines[2..], &["r_exp", "g_exp", "coeff"], None)?;
let ecp_pot = BseEcpPotential {
angular_momentum: vec![],
coefficients: ecp_data.coeff,
ecp_type: "scalar_ecp".to_string(),
r_exponents: ecp_data.r_exp,
gaussian_exponents: ecp_data.g_exp,
};
elements
.entry(element_Z.to_string())
.or_default()
.ecp_potentials
.get_or_insert_with(Default::default)
.push(ecp_pot);
}
let all_pot_am = helpers::potential_am_list(max_am);
let element_data = elements.get_mut(&element_Z.to_string()).unwrap();
let ecp_potentials = element_data.ecp_potentials.as_mut().unwrap();
if all_pot_am.len() != ecp_potentials.len() {
bse_raise!(
ValueError,
"Found incorrect number of potentials for {element_sym}: Expected {}, got {}",
all_pot_am.len(),
ecp_potentials.len()
)?;
}
for (idx, pot) in ecp_potentials.iter_mut().enumerate() {
pot.angular_momentum = vec![all_pot_am[idx]];
}
elements.entry(element_Z.to_string()).or_default().ecp_electrons = Some(ecp_electrons);
Ok(())
}
pub fn read_g94(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let basis_lines =
helpers::prune_lines(&basis_str.lines().map(|s| s.trim().to_string()).collect_vec(), "!", true, true);
if basis_lines.is_empty() {
return Ok(BseBasisMinimal::default());
}
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
let element_sections =
helpers::partition_lines(&basis_lines, |x| ELEMENT_RE.is_match(x), 0, None, None, None, 3, true)?;
for es in element_sections {
if es.len() > 3 && helpers::is_integer(&es[3]) {
parse_ecp_lines(&mut basis_dict.elements, &es)?;
} else {
parse_electron_lines(&mut basis_dict.elements, &es)?;
}
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
Ok(basis_dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_g94() {
let args = BseGetBasisArgsBuilder::default().elements("H, O".to_string()).build().unwrap();
let basis_str = get_formatted_basis("cc-pVDZ", "gaussian94", args);
let basis = read_g94(&basis_str).unwrap();
println!("{basis:#?}");
}
}