use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref ELEMENT_BLOCK_RE: Regex = Regex::new(r"^([A-Za-z]{1,3})\s+(\S+)$").unwrap();
static ref SHELL_COUNT_RE: Regex = Regex::new(r"^\d+$").unwrap();
static ref SHELL_DESC_RE: Regex = Regex::new(r"^1\s+(\d+)\s+(\d+)\s+(\d+)\s+(.+)$").unwrap();
static ref ECP_NAME_RE: Regex = Regex::new(r"^(\S+)$").unwrap();
static ref ECP_ELEMENT_RE: Regex = Regex::new(r"^([A-Za-z]{1,3})\s+nelec\s+(\d+)\s*$").unwrap();
static ref ECP_POTENTIAL_RE: Regex = Regex::new(r"^([A-Za-z]{1,3})\s+([A-Za-z]+)\s*$").unwrap();
static ref ECP_END_RE: Regex = Regex::new(r"^END\s+(\S+)$").unwrap();
}
fn parse_electron_lines(
elements: &mut HashMap<String, BseBasisElement>,
basis_lines: &[String],
) -> Result<(), BseError> {
if basis_lines.is_empty() {
return Ok(());
}
let mut start_idx = 0;
for (idx, line) in basis_lines.iter().enumerate() {
if ELEMENT_BLOCK_RE.is_match(line) {
start_idx = idx;
break;
}
if idx == basis_lines.len() - 1 {
return Ok(());
}
}
let basis_lines = &basis_lines[start_idx..];
if basis_lines.is_empty() {
return Ok(());
}
let parsed = helpers::parse_line_regex(&ELEMENT_BLOCK_RE, &basis_lines[0], "Element line")?;
let element_sym = &parsed[0];
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
if basis_lines.len() < 2 {
return Ok(());
}
if !SHELL_COUNT_RE.is_match(&basis_lines[1]) {
return Ok(()); }
let nshells: usize = basis_lines[1].parse().unwrap();
let mut iline = 2;
for _ in 0..nshells {
let parsed = helpers::parse_line_regex(&SHELL_DESC_RE, &basis_lines[iline], "Shell descriptor")?;
let min_am: i32 = parsed[0].parse().unwrap();
let max_am: i32 = parsed[1].parse().unwrap();
let nprim: usize = parsed[2].parse().unwrap();
let ncont_str = &parsed[3];
let ncont_values: Vec<i32> = ncont_str.split_whitespace().map(|s| s.parse().unwrap()).collect();
let shell_am: Vec<i32> = (min_am..=max_am).collect();
let ncont = ncont_values.iter().sum::<i32>() as usize;
iline += 1;
let matrix_data =
helpers::parse_matrix(&basis_lines[iline..iline + nprim], Some(nprim), Some(1 + ncont), None)?;
let exponents: Vec<String> = matrix_data.iter().map(|row| helpers::replace_d(&row[0])).collect();
let coefficients: Vec<Vec<String>> =
(1..=ncont).map(|i| matrix_data.iter().map(|row| helpers::replace_d(&row[i])).collect()).collect();
iline += nprim;
let func_type = lut::function_type_from_am(&shell_am, "gto", "spherical");
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: shell_am,
exponents,
coefficients,
};
elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
Ok(())
}
fn parse_ecp_lines(elements: &mut HashMap<String, BseBasisElement>, basis_lines: &[String]) -> Result<(), BseError> {
let mut iline = 0;
while iline < basis_lines.len() {
if ECP_ELEMENT_RE.is_match(&basis_lines[iline]) {
let parsed = helpers::parse_line_regex(&ECP_ELEMENT_RE, &basis_lines[iline], "ECP element")?;
let element_sym = &parsed[0];
let ecp_electrons: i32 = parsed[1].parse().unwrap();
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
elements.entry(element_Z.to_string()).or_default().ecp_electrons = Some(ecp_electrons);
iline += 1;
while iline < basis_lines.len()
&& !ECP_ELEMENT_RE.is_match(&basis_lines[iline])
&& !ECP_END_RE.is_match(&basis_lines[iline])
{
if ECP_POTENTIAL_RE.is_match(&basis_lines[iline]) {
let parsed = helpers::parse_line_regex(&ECP_POTENTIAL_RE, &basis_lines[iline], "ECP potential")?;
let am_char = &parsed[1];
let pot_am = if am_char.to_lowercase() == "ul" {
vec![-1] } else {
lut::amchar_to_int(am_char, true)
.map_or(bse_raise!(ValueError, "Unknown angular momentum: {}", am_char), Ok)?
};
iline += 1;
let mut ecp_data_lines = Vec::new();
while iline < basis_lines.len()
&& !ECP_POTENTIAL_RE.is_match(&basis_lines[iline])
&& !ECP_ELEMENT_RE.is_match(&basis_lines[iline])
&& !ECP_END_RE.is_match(&basis_lines[iline])
{
ecp_data_lines.push(basis_lines[iline].clone());
iline += 1;
}
let ecp_data = helpers::parse_ecp_table(&ecp_data_lines, &["r_exp", "g_exp", "coeff"], None)?;
let ecp_pot = BseEcpPotential {
angular_momentum: pot_am,
coefficients: ecp_data.coeff,
ecp_type: "scalar_ecp".to_string(),
r_exponents: ecp_data.r_exp,
gaussian_exponents: ecp_data.g_exp,
};
elements
.entry(element_Z.to_string())
.or_default()
.ecp_potentials
.get_or_insert_with(Default::default)
.push(ecp_pot);
} else {
iline += 1;
}
}
let ecp_potentials = elements.get(&element_Z.to_string()).and_then(|e| e.ecp_potentials.as_ref());
if let Some(potentials) = ecp_potentials {
let max_am = potentials
.iter()
.filter(|p| p.angular_momentum[0] >= 0)
.map(|p| p.angular_momentum[0])
.max()
.unwrap_or(0);
if let Some(element) = elements.get_mut(&element_Z.to_string()) {
if let Some(potentials) = &mut element.ecp_potentials {
for pot in potentials.iter_mut() {
if pot.angular_momentum[0] == -1 {
pot.angular_momentum = vec![max_am];
}
}
}
}
}
} else if ECP_END_RE.is_match(&basis_lines[iline]) {
break;
} else {
iline += 1;
}
}
Ok(())
}
pub fn read_cp2k(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let basis_lines =
helpers::prune_lines(&basis_str.lines().map(|s| s.trim().to_string()).collect_vec(), "#", true, true);
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
if basis_lines.is_empty() {
return Ok(basis_dict);
}
let ecp_start_idx = basis_lines.iter().position(|x| x.ends_with("_ECP"));
let electron_lines = if let Some(idx) = ecp_start_idx { &basis_lines[..idx] } else { &basis_lines };
let ecp_lines = if let Some(idx) = ecp_start_idx {
&basis_lines[idx + 1..]
} else {
&[]
};
let element_blocks = helpers::partition_lines(
electron_lines,
|x| ELEMENT_BLOCK_RE.is_match(x),
0,
None,
None,
None,
3, true,
)?;
for element_lines in element_blocks {
parse_electron_lines(&mut basis_dict.elements, &element_lines)?;
}
if !ecp_lines.is_empty() {
parse_ecp_lines(&mut basis_dict.elements, ecp_lines)?;
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
Ok(basis_dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_cp2k() {
let args = BseGetBasisArgsBuilder::default().elements("1, 6-O".to_string()).build().unwrap();
let basis_str = get_formatted_basis("cc-pVTZ", "cp2k", args);
let basis = read_cp2k(&basis_str).unwrap();
println!("{basis:#?}");
}
#[test]
fn test_read_cp2k_ecp() {
let args = BseGetBasisArgsBuilder::default().elements("49-51".to_string()).build().unwrap();
let basis_str = get_formatted_basis("def2-ECP", "cp2k", args);
let basis = read_cp2k(&basis_str).unwrap();
println!("{basis:#?}");
}
}