use crate::prelude::*;
use crate::readers::helpers;
lazy_static::lazy_static! {
static ref SHELL_BEGIN_RE: Regex = Regex::new(r"^([SPDFGHIJKLMNOQRTUVWXYZABCE])\s+(\d+)\s+(\d+)$").unwrap();
}
fn parse_element_lines(
elements: &mut HashMap<String, BseBasisElement>,
element_lines: &[String],
element_sym: &str,
) -> Result<(), BseError> {
let element_Z = lut::element_Z_from_sym(element_sym)
.map_or(bse_raise!(ValueError, "Unknown element symbol: {}", element_sym), Ok)?;
let shell_blocks = helpers::partition_lines(
element_lines,
|x| SHELL_BEGIN_RE.is_match(x),
0,
None,
None,
None,
2, true,
)?;
for shell_lines in shell_blocks {
let parsed = helpers::parse_line_regex(&SHELL_BEGIN_RE, &shell_lines[0], "Shell: amchar, nprim, ncont")?;
let amchar = &parsed[0];
let nprim: usize = parsed[1].parse().map_or(bse_raise!(ValueError, "Invalid nprim: {}", parsed[1]), Ok)?;
let ncont: usize = parsed[2].parse().map_or(bse_raise!(ValueError, "Invalid ncont: {}", parsed[2]), Ok)?;
if ncont != 1 {
eprintln!("Warning: VeloxChem format expects ncont=1 for all shells, found ncont={ncont}");
}
let (exponents, coefficients) =
helpers::parse_primitive_matrix(&shell_lines[1..], Some(nprim), Some(ncont), None)?;
let am = lut::amchar_to_int(amchar, HIJ).unwrap_or_default();
let func_type = lut::function_type_from_am(&am, "gto", "spherical");
let shell = BseElectronShell {
function_type: func_type,
region: "".to_string(),
angular_momentum: am,
exponents,
coefficients,
};
elements
.entry(element_Z.to_string())
.or_default()
.electron_shells
.get_or_insert_with(Default::default)
.push(shell);
}
Ok(())
}
pub fn read_veloxchem(basis_str: &str) -> Result<BseBasisMinimal, BseError> {
let mut basis_lines: Vec<String> = basis_str.lines().map(|s| s.trim().to_string()).collect();
if basis_lines.is_empty() {
return Ok(BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema {
schema_type: "minimal".to_string(),
schema_version: "0.1".to_string(),
},
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
});
}
let expected_md5sum = basis_lines.pop().unwrap_or_default().trim().to_string();
let basis_set_idx = basis_lines.iter().position(|x| x.starts_with("@BASIS_SET"));
let basis_set_idx = match basis_set_idx {
Some(idx) => idx,
None => bse_raise!(ValueError, "No @BASIS_SET line found in VeloxChem format")?,
};
let content_to_check = basis_lines[basis_set_idx..].join("\n") + "\n";
let computed_md5sum = format!("{:x}", md5::compute(content_to_check.as_bytes()));
if computed_md5sum != expected_md5sum {
eprintln!(
"Warning: VeloxChem MD5 checksum mismatch (computed: {}, expected: {})",
computed_md5sum, expected_md5sum
);
}
basis_lines = helpers::prune_lines(&basis_lines, "!#", true, true);
let atombasis_indices: Vec<(usize, String)> = basis_lines
.iter()
.enumerate()
.filter_map(|(i, line)| {
if line.starts_with("@ATOMBASIS") {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 2 {
Some((i, parts[1].to_string()))
} else {
None
}
} else {
None
}
})
.collect();
let end_indices: Vec<usize> = basis_lines
.iter()
.enumerate()
.filter_map(|(i, line)| if line.starts_with("@END") { Some(i) } else { None })
.collect();
if atombasis_indices.len() != end_indices.len() {
bse_raise!(ValueError, "Mismatched @ATOMBASIS and @END markers")?;
}
let mut basis_dict = BseBasisMinimal {
molssi_bse_schema: BseMolssiBseSchema { schema_type: "minimal".to_string(), schema_version: "0.1".to_string() },
elements: HashMap::new(),
function_types: Vec::new(),
name: "unknown_basis".to_string(),
description: "no_description".to_string(),
};
for ((start_idx, element_sym), end_idx) in atombasis_indices.into_iter().zip(end_indices.into_iter()) {
if end_idx <= start_idx + 1 {
continue;
}
let element_lines: Vec<String> = basis_lines[start_idx + 1..end_idx].to_vec();
parse_element_lines(&mut basis_dict.elements, &element_lines, &element_sym)?;
}
let function_types = compose::whole_basis_types(&basis_dict.elements);
basis_dict.function_types = function_types;
Ok(basis_dict)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_veloxchem() {
let args = BseGetBasisArgsBuilder::default().elements("H, C-O".to_string()).build().unwrap();
let basis_str = get_formatted_basis("cc-pVDZ", "veloxchem", args);
let basis = read_veloxchem(&basis_str).unwrap();
println!("{basis:#?}");
}
}