use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref ORBITAL_RE: Regex = Regex::new(
&format!(r"^O-([A-Za-z]+)(?: ([A-Za-z]+))* \(({})\)\s*$", helpers::BASIS_NAME_RE.as_str())
).unwrap();
static ref SHELL_RE: Regex = Regex::new(r"^\s*(\d+)\s+(\d+)\s+(\d+)\s*$").unwrap();
static ref ECP_START_RE: Regex = Regex::new(r"^\s*ECP\s*$").unwrap();
static ref ECP_ENTRY_RE: Regex = Regex::new(r"^([A-Za-z]+)\s+nelec\s+(\d+)\s*$").unwrap();
static ref ECP_SHELL_RE: Regex = Regex::new(r"^([A-Za-z]+)\s+([A-Za-z]+)\s*$").unwrap();
static ref ECP_DATA_RE: Regex = Regex::new(
&format!(r"^\s*(\d+)\s+({})\s+({})\s*$", helpers::FLOATING_RE.as_str(), helpers::FLOATING_RE.as_str())
).unwrap();
static ref BASIS_END_RE: Regex = Regex::new(r"^\s*END\s*$").unwrap();
}
fn parse_electron_lines(
elements: &mut HashMap<String, BseBasisElement>,
element_lines: &[String],
) -> Result<(), BseError> {
if element_lines.is_empty() {
return Ok(());
}
let parsed = helpers::parse_line_regex(&ORBITAL_RE, &element_lines[0], "O-element (basis_name)")?;
let element_name = &parsed[0];
let element_Z = lut::element_Z_from_name(element_name)
.map_or(bse_raise!(ValueError, "Unknown element name: {}", element_name), Ok)?;
let n_shells: usize = element_lines[1]
.split_whitespace()
.next()
.map_or(bse_raise!(ValueError, "Missing number of shells"), Ok)?
.parse()
.map_or(bse_raise!(ValueError, "Invalid number of shells"), Ok)?;
let shell_blocks = helpers::partition_lines(
&element_lines[2..],
|x| SHELL_RE.is_match(x),
0,
None,
None,
None,
2, true,
)?;
if shell_blocks.len() != n_shells {
eprintln!("Warning: Expected {} shells, found {}", n_shells, shell_blocks.len());
}
for shell_lines in &shell_blocks {
if !SHELL_RE.is_match(&shell_lines[0]) {
continue;
}
let parsed = helpers::parse_line_regex(&SHELL_RE, &shell_lines[0], "formal_qn, am, nprim")?;
let _formal_qn: i32 = parsed[0].parse().map_or(bse_raise!(ValueError, "Invalid formal qn"), Ok)?;
let shell_am: i32 = parsed[1].parse().map_or(bse_raise!(ValueError, "Invalid shell am"), Ok)?;
let nprim: usize = parsed[2].parse().map_or(bse_raise!(ValueError, "Invalid nprim"), Ok)?;
let ngen = 1;
let (exponents, coefficients) =
helpers::parse_primitive_matrix(&shell_lines[1..1 + nprim], Some(nprim), Some(ngen), None)?;
let func_type = lut::function_type_from_am(&[shell_am], "gto", "spherical");
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: vec![shell_am],
exponents,
coefficients,
};
elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
Ok(())
}
fn parse_ecp_lines(elements: &mut HashMap<String, BseBasisElement>, ecp_lines: &[String]) -> Result<(), BseError> {
if ecp_lines.is_empty() {
return Ok(());
}
let ecp_lines = if !ecp_lines.is_empty() && ecp_lines[0].trim() == "ECP" { &ecp_lines[1..] } else { ecp_lines };
let element_blocks = helpers::partition_lines(
ecp_lines,
|x| ECP_ENTRY_RE.is_match(x),
0,
None,
None,
None,
2, true,
)?;
for element_lines in &element_blocks {
if !ECP_ENTRY_RE.is_match(&element_lines[0]) {
continue;
}
let parsed = helpers::parse_line_regex(&ECP_ENTRY_RE, &element_lines[0], "symbol nelec N")?;
let element_sym = &parsed[0];
let ecp_electrons: i32 = parsed[1].parse().map_or(bse_raise!(ValueError, "Invalid nelec"), Ok)?;
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
elements.entry(element_Z.to_string()).or_default().ecp_electrons = Some(ecp_electrons);
let mut am_blocks: Vec<(String, Vec<Vec<String>>)> = Vec::new();
let mut current_block: Option<(String, Vec<Vec<String>>)> = None;
for line in &element_lines[1..] {
if ECP_SHELL_RE.is_match(line) {
if let Some(block) = current_block.take() {
am_blocks.push(block);
}
let parsed = helpers::parse_line_regex(&ECP_SHELL_RE, line, "symbol ecp-am")?;
let ecp_am = &parsed[1];
current_block = Some((ecp_am.clone(), Vec::new()));
} else if ECP_DATA_RE.is_match(line) {
if let Some((_, data)) = &mut current_block {
let parsed = helpers::parse_line_regex(&ECP_DATA_RE, line, "rexp gexp gcoeff")?;
data.push(parsed);
}
} else if BASIS_END_RE.is_match(line) {
break;
}
}
if let Some(block) = current_block.take() {
am_blocks.push(block);
}
let n_blocks = am_blocks.len();
for (iblock, (am_str, data)) in am_blocks.into_iter().enumerate() {
let current_am = if iblock == 0 {
(n_blocks - 1) as i32
} else {
(iblock - 1) as i32
};
if iblock > 0 {
let expected_amchar = lut::amint_to_char(&[current_am], HIK).to_lowercase();
if am_str.to_lowercase() != expected_amchar {
eprintln!("Warning: Expected ECP AM {} but found {}", expected_amchar, am_str);
}
}
let r_exp: Vec<i32> = data.iter().map(|x| x[0].parse().unwrap()).collect();
let g_exp: Vec<String> = data.iter().map(|x| x[1].clone()).collect();
let coeff: Vec<String> = data.iter().map(|x| x[2].clone()).collect();
let ecp_pot = BseEcpPotential {
angular_momentum: vec![current_am],
coefficients: vec![coeff],
ecp_type: "scalar_ecp".to_string(),
r_exponents: r_exp,
gaussian_exponents: g_exp,
};
elements
.entry(element_Z.to_string())
.or_default()
.ecp_potentials
.get_or_insert_with(Default::default)
.push(ecp_pot);
}
}
Ok(())
}
pub fn read_demon2k(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let basis_lines: Vec<String> = basis_str.lines().map(|s| s.trim().to_string()).collect();
let basis_lines = helpers::prune_lines(&basis_lines, "#", true, true);
if basis_lines.is_empty() {
return Ok(BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema {
schema_type: "minimal".to_string(),
schema_version: "0.1".to_string(),
},
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
});
}
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
let orbital_sections = helpers::partition_lines(
&basis_lines,
|x| ORBITAL_RE.is_match(x),
0,
None,
None,
None,
3, true,
)?;
for section in &orbital_sections {
if !section.is_empty() && ORBITAL_RE.is_match(§ion[0]) {
parse_electron_lines(&mut basis_dict.elements, section)?;
}
}
let ecp_sections = helpers::partition_lines(
&basis_lines,
|x| ECP_START_RE.is_match(x),
0,
None,
None,
None,
2, true,
)?;
for section in &ecp_sections {
if !section.is_empty() && ECP_START_RE.is_match(§ion[0]) {
parse_ecp_lines(&mut basis_dict.elements, section)?;
}
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
Ok(basis_dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_demon2k() {
let args = BseGetBasisArgsBuilder::default().elements("H, C-O".to_string()).build().unwrap();
let basis_str = get_formatted_basis("cc-pVDZ", "demon2k", args);
let basis = read_demon2k(&basis_str).unwrap();
println!("{basis:#?}");
}
#[test]
fn test_read_demon2k_ecp() {
let args = BseGetBasisArgsBuilder::default().elements("49-51".to_string()).build().unwrap();
let basis_str = get_formatted_basis("def2-ECP", "demon2k", args);
let basis = read_demon2k(&basis_str).unwrap();
println!("{basis:#?}");
}
}