use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref ELEMENT_BLOCK_RE: Regex = Regex::new(r"^([a-zA-Z]{1,3}):(.*)$").unwrap();
static ref ECP_BLOCK_RE: Regex = RegexBuilder::new(r"^ncore\s*=\s*(\d+)\s+lmax\s*=\s*(\d+)\s*$")
.case_insensitive(true)
.build()
.unwrap();
}
fn parse_electron_lines(
elements: &mut HashMap<String, BseBasisElement>,
basis_lines: &[String],
) -> Result<(), BseError> {
let parsed = helpers::parse_line_regex(&ELEMENT_BLOCK_RE, &basis_lines[0], "Element line")?;
let element_sym = &parsed[0];
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
let mut iline = 2;
if !basis_lines[iline].is_empty() {
bse_raise!(ValueError, "Expected blank line, found: {}", basis_lines[iline])?;
}
iline += 1;
let nshell: usize = basis_lines[iline]
.trim()
.parse()
.map_or(bse_raise!(ValueError, "Invalid nshell: {}", basis_lines[iline]), Ok)?;
iline += 1;
if nshell == 0 {
return Ok(());
}
let shell_ams: Vec<i32> = basis_lines[iline]
.split_whitespace()
.map(|s| s.parse().map_err(|_| BseError::ValueError(format!("Invalid AM: {}", s))))
.collect::<Result<Vec<_>, _>>()?;
iline += 1;
let shell_ngens: Vec<usize> = basis_lines[iline]
.split_whitespace()
.map(|s| s.parse().map_err(|_| BseError::ValueError(format!("Invalid ngen: {}", s))))
.collect::<Result<Vec<_>, _>>()?;
iline += 1;
let shell_nprims: Vec<usize> = basis_lines[iline]
.split_whitespace()
.map(|s| s.parse().map_err(|_| BseError::ValueError(format!("Invalid nprim: {}", s))))
.collect::<Result<Vec<_>, _>>()?;
iline += 1;
if shell_ams.len() != nshell || shell_ngens.len() != nshell || shell_nprims.len() != nshell {
bse_raise!(
ValueError,
"Inconsistent shell data: expected {} shells, got AM={}, ngen={}, nprim={}",
nshell,
shell_ams.len(),
shell_ngens.len(),
shell_nprims.len()
)?;
}
for shell_idx in 0..nshell {
let shell_am = vec![shell_ams[shell_idx]];
let nprim = shell_nprims[shell_idx];
let ngen = shell_ngens[shell_idx];
let func_type = lut::function_type_from_am(&shell_am, "gto", "spherical");
if iline < basis_lines.len() && basis_lines[iline].is_empty() {
iline += 1;
}
let mut exp_lines = 0;
for i in iline..basis_lines.len() {
if basis_lines[i].is_empty() {
break;
}
exp_lines += 1;
}
let exponents: Vec<String> = basis_lines[iline..iline + exp_lines]
.iter()
.flat_map(|line| line.split_whitespace().map(helpers::replace_d).collect::<Vec<_>>())
.collect();
iline += exp_lines;
if exponents.len() != nprim {
bse_raise!(ValueError, "Expected {} exponents, found {}", nprim, exponents.len())?;
}
if iline < basis_lines.len() && basis_lines[iline].is_empty() {
iline += 1;
}
let mut coeff_matrix: Vec<Vec<String>> = Vec::new();
for _ in 0..nprim {
let mut row: Vec<String> = Vec::new();
while row.len() < ngen && iline < basis_lines.len() {
let nums: Vec<String> = basis_lines[iline].split_whitespace().map(helpers::replace_d).collect();
row.extend(nums);
iline += 1;
}
if row.len() != ngen {
bse_raise!(ValueError, "Expected {} coefficients in row, found {}", ngen, row.len())?;
}
coeff_matrix.push(row);
}
let coefficients = misc::transpose_matrix(&coeff_matrix);
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: shell_am,
exponents,
coefficients,
};
elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
Ok(())
}
fn parse_ecp_lines(elements: &mut HashMap<String, BseBasisElement>, basis_lines: &[String]) -> Result<(), BseError> {
let parsed = helpers::parse_line_regex(&ELEMENT_BLOCK_RE, &basis_lines[0], "Element line")?;
let element_sym = &parsed[0];
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
let mut ncore = 0;
let mut lmax = 0;
let mut ncore_line_idx = None;
for (idx, line) in basis_lines.iter().enumerate().skip(1) {
if let Some(caps) = ECP_BLOCK_RE.captures(line) {
ncore = caps.get(1).unwrap().as_str().parse().unwrap();
lmax = caps.get(2).unwrap().as_str().parse().unwrap();
ncore_line_idx = Some(idx);
break;
}
}
let ncore_line_idx = match ncore_line_idx {
Some(idx) => idx,
None => bse_raise!(ValueError, "No ncore/lmax line found in ECP block")?,
};
elements.entry(element_Z.to_string()).or_default().ecp_electrons = Some(ncore);
let ecp_lines = &basis_lines[ncore_line_idx + 1..];
let ecp_lines: Vec<String> = ecp_lines.iter().filter(|x| *x != "*").cloned().collect();
let ecp_potentials = helpers::partition_lines(
&ecp_lines,
|x| x.chars().next().is_some_and(|c| c.is_alphabetic()),
0,
None,
None,
None,
2,
true,
)?;
let mut _found_max = false;
for pot_lines in ecp_potentials {
let first_line = &pot_lines[0];
let parts: Vec<&str> = first_line.split('-').collect();
let pot_am = if parts.len() == 1 {
let am_char = parts[0].trim();
lut::amchar_to_int(am_char, false)
.map_or(bse_raise!(ValueError, "Unknown angular momentum: {}", am_char), Ok)?
} else {
let am_char = parts[0].trim();
lut::amchar_to_int(am_char, false)
.map_or(bse_raise!(ValueError, "Unknown angular momentum: {}", am_char), Ok)?
};
if pot_am[0] == lmax {
_found_max = true;
}
let ecp_data = helpers::parse_ecp_table(&pot_lines[1..], &["coeff", "r_exp", "g_exp"], None)?;
let ecp_pot = BseEcpPotential {
angular_momentum: pot_am,
coefficients: ecp_data.coeff,
ecp_type: "scalar_ecp".to_string(),
r_exponents: ecp_data.r_exp,
gaussian_exponents: ecp_data.g_exp,
};
elements
.entry(element_Z.to_string())
.or_default()
.ecp_potentials
.get_or_insert_with(Default::default)
.push(ecp_pot);
}
Ok(())
}
pub fn read_genbas(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let basis_lines =
helpers::prune_lines(&basis_str.lines().map(|s| s.trim().to_string()).collect_vec(), "!#", false, true);
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
if basis_lines.is_empty() {
return Ok(basis_dict);
}
let element_blocks = helpers::partition_lines(
&basis_lines,
|x| {
if ELEMENT_BLOCK_RE.is_match(x) {
return true;
}
if x.starts_with('*') && x.len() > 1 {
let rest = &x[1..];
return ELEMENT_BLOCK_RE.is_match(rest);
}
false
},
0,
Some(1),
None,
None,
1, true,
)?;
for element_lines in element_blocks {
if element_lines.is_empty() {
continue;
}
let mut start_idx = 0;
let mut is_ecp = false;
for (idx, line) in element_lines.iter().enumerate() {
if ELEMENT_BLOCK_RE.is_match(line) {
start_idx = idx;
break;
}
if line.starts_with('*') && line.len() > 1 && ELEMENT_BLOCK_RE.is_match(&line[1..]) {
start_idx = idx;
is_ecp = true;
break;
}
if line == "*" {
is_ecp = true;
start_idx = idx + 1;
}
}
let element_lines: Vec<String> = element_lines[start_idx..].to_vec();
if element_lines.is_empty() {
continue;
}
let element_lines: Vec<String> = if element_lines[0].starts_with('*') && element_lines[0].len() > 1 {
let mut lines = element_lines.clone();
lines[0] = lines[0][1..].to_string();
lines
} else {
element_lines
};
if element_lines.is_empty() {
continue;
}
let has_ecp_info = element_lines.iter().any(|l| ECP_BLOCK_RE.is_match(l));
if has_ecp_info || is_ecp {
parse_ecp_lines(&mut basis_dict.elements, &element_lines)?;
} else {
parse_electron_lines(&mut basis_dict.elements, &element_lines)?;
}
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
Ok(basis_dict)
}
pub fn read_cfour(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
read_genbas(basis_str)
}
pub fn read_aces2(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
read_genbas(basis_str)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_genbas() {
let args = BseGetBasisArgsBuilder::default().elements("H, O".to_string()).build().unwrap();
let basis_str = get_formatted_basis("cc-pVDZ", "cfour", args);
let basis = read_genbas(&basis_str).unwrap();
println!("{basis:#?}");
}
#[test]
fn test_read_genbas_ecp() {
let args = BseGetBasisArgsBuilder::default().elements("49-51".to_string()).build().unwrap();
let basis_str = get_formatted_basis("def2-ECP", "cfour", args);
let basis = read_genbas(&basis_str).unwrap();
println!("{basis:#?}");
}
}