use crate::prelude::*;
lazy_static::lazy_static! {
pub static ref FLOATING_RE: Regex = Regex::new(r"[-+]?\d*\.\d*(?:[dDeE][-+]?\d+)?").unwrap();
pub static ref FLOATING_ONLY_RE: Regex = Regex::new(r"^[-+]?\d*\.\d*(?:[dDeE][-+]?\d+)?$").unwrap();
pub static ref INTEGER_RE: Regex = Regex::new(r"[-+]?\d+").unwrap();
pub static ref INTEGER_ONLY_RE: Regex = Regex::new(r"^[-+]?\d+$").unwrap();
pub static ref BASIS_NAME_RE: Regex = Regex::new(r"\d*[a-zA-Z][a-zA-Z0-9\-\+\*\(\)\[\]]*").unwrap();
pub static ref SPACES_RE: Regex = Regex::new(r"\s+").unwrap();
}
#[inline]
pub(crate) fn is_floating(s: &str) -> bool {
replace_d(s).parse::<f64>().is_ok()
}
#[inline]
pub(crate) fn is_integer(s: &str) -> bool {
s.parse::<i32>().is_ok()
}
#[inline]
pub(crate) fn replace_d(s: &str) -> String {
s.replace('d', "e").replace('D', "E")
}
#[inline]
#[allow(unused)]
pub(crate) fn potential_am_list(max_am: i32) -> Vec<i32> {
[max_am].into_iter().chain(0..max_am).collect()
}
#[allow(unused)]
pub(crate) fn chunk_list<T: Clone>(lst: &[T], rows: usize, cols: usize) -> Result<Vec<Vec<T>>, BseError> {
let n_elements = lst.len();
if n_elements != rows * cols {
bse_raise!(ValueError, "Cannot partition {n_elements} elements into a {rows}x{cols} matrix")?
}
let mat: Vec<Vec<T>> = lst.chunks(cols).map(|chunk| chunk.to_vec()).collect();
debug_assert_eq!(mat.len(), rows);
Ok(mat)
}
#[allow(unused)]
pub(crate) fn remove_expected_line(lines: &[String], expected: &str, position: isize) -> Result<Vec<String>, BseError> {
if lines.is_empty() {
bse_raise!(ValueError, "No lines to test for expected line")?
}
if position >= 0 && lines.len() <= position as usize {
bse_raise!(ValueError, "Not enough lines. Can't test line {position} when there are {} lines", lines.len())?
} else if position < 0 && lines.len() < (-position) as usize {
bse_raise!(ValueError, "Not enough lines. Can't test line {position} when there are {} lines", lines.len())?
}
let pos = if position >= 0 { position as usize } else { (lines.len() as isize + position) as usize };
if lines[pos] != expected {
bse_raise!(ValueError, "Expected line '{expected}' at position {pos}, but found '{}'", lines[pos])?
}
let mut new_lines = lines.to_vec();
new_lines.remove(pos);
Ok(new_lines)
}
pub fn parse_line_regex(rex: &Regex, line: &str, description: &str) -> Result<Vec<String>, BseError> {
let captures = rex.captures(line).map_or(
match description {
"" => bse_raise!(ValueError, "Regex '{rex}' does not match line: '{line}'"),
_ => bse_raise!(ValueError, "Regex '{description}' does not match line: '{line}'. Regex is '{rex}'"),
},
Ok,
)?;
let result: Vec<String> = captures
.iter()
.skip(1) .map(|m| m.map(|m| m.as_str().to_string()).unwrap_or_default())
.collect();
Ok(result)
}
pub fn parse_line_regex_dict(rex: &Regex, line: &str, description: &str) -> Result<HashMap<String, String>, BseError> {
let captures = rex.captures(line).map_or(
match description {
"" => bse_raise!(ValueError, "Regex '{rex}' does not match line: '{line}'"),
_ => bse_raise!(ValueError, "Regex '{description}' does not match line: '{line}'. Regex is '{rex}'"),
},
Ok,
)?;
let mut result = HashMap::new();
for name in rex.capture_names().flatten() {
if let Some(matches) = captures.name(name) {
result.insert(name.to_string(), matches.as_str().to_string());
}
}
Ok(result)
}
pub fn partition_lines(
lines: &[String],
condition: impl Fn(&str) -> bool,
before: usize,
min_after: Option<usize>,
min_blocks: Option<usize>,
max_blocks: Option<usize>,
min_size: usize,
include_match: bool,
) -> Result<Vec<Vec<String>>, BseError> {
let mut all_blocks = Vec::new();
let mut cur_block = Vec::new();
let mut i = 0;
while i < lines.len() {
let line = &lines[i];
if condition(line) {
if !cur_block.is_empty() {
all_blocks.push(cur_block);
cur_block = Vec::new();
}
if include_match {
cur_block.push(line.clone());
}
if let Some(after) = min_after {
let end = std::cmp::min(i + 1 + after, lines.len());
cur_block.extend(lines[i + 1..end].iter().cloned());
i += after;
}
} else {
cur_block.push(line.clone());
}
i += 1;
}
if !cur_block.is_empty() {
all_blocks.push(cur_block);
}
if before > 0 {
if all_blocks.len() <= 1 {
bse_raise!(ValueError, "Cannot partition lines with before = {}: have {} blocks", before, all_blocks.len())?
}
if all_blocks[0].len() != before {
bse_raise!(
ValueError,
"Cannot partition lines with before = {}: first block has {} lines",
before,
all_blocks[0].len()
)?
}
for idx in 1..all_blocks.len() {
let split_point = all_blocks[idx - 1].len() - before;
let mut moved = all_blocks[idx - 1].split_off(split_point);
moved.append(&mut all_blocks[idx]);
all_blocks[idx] = moved;
}
let first_block = all_blocks.remove(0);
debug_assert!(first_block.is_empty());
}
if min_size > 0 {
for (idx, block) in all_blocks.iter().enumerate() {
if block.len() < min_size {
bse_raise!(ValueError, "Block {idx} does not have minimum number of lines ({min_size})")?
}
}
}
if let Some(min_blocks) = min_blocks {
if all_blocks.len() < min_blocks {
bse_raise!(ValueError, "Found {} blocks, but need at least {}", all_blocks.len(), min_blocks)?
}
}
if let Some(max_blocks) = max_blocks {
if all_blocks.len() > max_blocks {
bse_raise!(ValueError, "Found {} blocks, but need at most {}", all_blocks.len(), max_blocks)?
}
}
Ok(all_blocks)
}
pub fn read_n_floats(
lines: &[String],
n_numbers: usize,
split_re: Option<&Regex>,
) -> Result<(Vec<String>, Vec<String>), BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let mut found_numbers = Vec::new();
let mut remaining_lines = lines.to_vec();
while found_numbers.len() < n_numbers {
if remaining_lines.is_empty() {
bse_raise!(ValueError, "Wanted {} numbers but ran out of lines after {}", n_numbers, found_numbers.len())?
}
let first_line = &remaining_lines[0];
if first_line.trim().is_empty() {
bse_raise!(ValueError, "Wanted {} numbers but found empty line after {}", n_numbers, found_numbers.len())?
}
let line = replace_d(first_line);
let parts = split_re.split(line.trim()).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect_vec();
found_numbers.extend(parts);
remaining_lines.remove(0);
}
if found_numbers.len() > n_numbers {
bse_raise!(ValueError, "Wanted {n_numbers} numbers, but found extra numbers: {found_numbers:?}")?
}
if !found_numbers.iter().all(|x| is_floating(x)) {
bse_raise!(ValueError, "Non-floating-point value found in numbers: {found_numbers:?}")?
}
Ok((found_numbers, remaining_lines))
}
pub fn read_all_floats(lines: &[String], split_re: Option<&Regex>) -> Result<Vec<String>, BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let found_numbers: Vec<String> = lines
.iter()
.flat_map(|line| {
let processed_line = replace_d(line);
split_re.split(processed_line.trim()).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect_vec()
})
.collect();
if !found_numbers.iter().all(|s| is_floating(s)) {
bse_raise!(ValueError, "Non-floating-point value found in numbers: {:?}", found_numbers)?
}
Ok(found_numbers)
}
pub fn read_n_integers(
lines: &[String],
n_ints: usize,
split_re: Option<&Regex>,
) -> Result<(Vec<String>, Vec<String>), BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let mut found_numbers = Vec::new();
let mut remaining_lines = lines.to_vec();
while found_numbers.len() < n_ints {
if remaining_lines.is_empty() {
bse_raise!(ValueError, "Wanted {n_ints} integers but ran out of lines after {}", found_numbers.len())?
}
let line = remaining_lines[0].trim();
let parts: Vec<String> = split_re.split(line).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect();
found_numbers.extend(parts);
remaining_lines.remove(0);
}
if found_numbers.len() > n_ints {
bse_raise!(ValueError, "Wanted {n_ints} integers, but found extra numbers: {found_numbers:?}")?
}
if !found_numbers.iter().all(|x| is_integer(x)) {
bse_raise!(ValueError, "Non-integer value found in numbers: {found_numbers:?}")?
}
Ok((found_numbers, remaining_lines))
}
pub fn parse_fixed_matrix(
lines: &[String],
rows: usize,
cols: usize,
split_re: Option<&Regex>,
) -> Result<(Vec<Vec<String>>, Vec<String>), BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let mut matrix = Vec::with_capacity(rows);
let mut remaining_lines = lines.to_vec();
for _ in 0..rows {
let (row_data, new_lines) = read_n_floats(&remaining_lines, cols, Some(split_re))?;
matrix.push(row_data);
remaining_lines = new_lines;
}
Ok((matrix, remaining_lines))
}
pub fn parse_matrix(
lines: &[String],
rows: Option<usize>,
cols: Option<usize>,
split_re: Option<&Regex>,
) -> Result<Vec<Vec<String>>, BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let mut mat = Vec::new();
for line in lines {
let processed_line = replace_d(line);
let row: Vec<String> =
split_re.split(processed_line.trim()).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect();
if !row.iter().all(|x| is_floating(x)) {
bse_raise!(ValueError, "Non-floating-point value found in matrix: {row:?}")?
}
if !row.is_empty() {
mat.push(row);
}
}
if mat.is_empty() {
bse_raise!(ValueError, "Empty matrix")?
}
let first_row_len = mat[0].len();
if first_row_len == 0 {
bse_raise!(ValueError, "Matrix row has zero values")?
}
for row in &mat {
if row.len() != first_row_len {
bse_raise!(ValueError, "Inconsistent number of columns: {} vs {}", row.len(), first_row_len)?
}
}
if let Some(expected_rows) = rows {
if mat.len() != expected_rows {
bse_raise!(ValueError, "Inconsistent number of rows: {expected_rows} vs {}", mat.len())?
}
}
if let Some(expected_cols) = cols {
if mat[0].len() != expected_cols {
bse_raise!(ValueError, "Inconsistent number of columns: {expected_cols} vs {}", mat[0].len())?
}
}
Ok(mat)
}
pub fn parse_primitive_matrix(
lines: &[String],
nprim: Option<usize>,
ngen: Option<usize>,
split_re: Option<&Regex>,
) -> Result<(Vec<String>, Vec<Vec<String>>), BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
let mut exponents = Vec::new();
let mut coefficients = Vec::new();
for line in lines {
let processed_line = replace_d(line);
let parts: Vec<String> =
split_re.split(processed_line.trim()).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect();
if parts.is_empty() {
continue;
}
let e = parts[0].clone();
let c = parts[1..].to_vec();
if !is_floating(&e) {
bse_raise!(ValueError, "Non-floating-point value found in exponents: {e}")?
}
if !c.iter().all(|x| is_floating(x)) {
bse_raise!(ValueError, "Non-floating-point value found in coefficients: {c:?}")?
}
exponents.push(e);
coefficients.push(c);
}
if coefficients.is_empty() && !exponents.is_empty() {
bse_raise!(ValueError, "Missing contraction coefficients")?
}
let first_coeff_len = coefficients.first().map_or(0, |v| v.len());
for (i, c) in coefficients.iter().enumerate() {
if c.is_empty() {
bse_raise!(ValueError, "Missing contraction coefficients in row {}", i + 1)?
}
if c.len() != first_coeff_len {
bse_raise!(ValueError, "Inconsistent number of coefficients: {} vs {}", c.len(), first_coeff_len)?
}
}
let coefficients = misc::transpose_matrix(&coefficients);
if exponents.is_empty() {
bse_raise!(ValueError, "No exponents found")?
}
if coefficients.is_empty() {
bse_raise!(ValueError, "No coefficients found")?
}
if let Some(expected_nprim) = nprim {
if exponents.len() != expected_nprim {
bse_raise!(
ValueError,
"Inconsistent number of primitives in exponents: {expected_nprim} vs {}",
exponents.len()
)?
}
if coefficients[0].len() != expected_nprim {
bse_raise!(
ValueError,
"Inconsistent number of primitives in coefficients: {expected_nprim} vs {}",
coefficients[0].len()
)?
}
}
if let Some(expected_ngen) = ngen {
if coefficients.len() != expected_ngen {
bse_raise!(
ValueError,
"Inconsistent number of general contractions: {expected_ngen} vs {}",
coefficients.len()
)?
}
}
Ok((exponents, coefficients))
}
pub struct ReaderECP {
pub r_exp: Vec<i32>,
pub g_exp: Vec<String>,
pub coeff: Vec<Vec<String>>,
}
pub fn parse_ecp_table(lines: &[String], order: &[&str], split_re: Option<&Regex>) -> Result<ReaderECP, BseError> {
let split_re = split_re.unwrap_or(&SPACES_RE);
if order.len() != 3 {
bse_raise!(ValueError, "ECP table requires exactly 3 columns, got {}", order.len())?
}
let mut r_exp = Vec::new();
let mut g_exp = Vec::new();
let mut coeff = Vec::new();
for line in lines {
let processed_line = replace_d(line);
let parts: Vec<String> =
split_re.split(processed_line.trim()).filter(|s| !s.is_empty()).map(|s| s.to_string()).collect();
if parts.len() != 3 {
bse_raise!(ValueError, "Expected 3 values in ECP table, found {}", parts.len())?
}
let mut columns = HashMap::new();
for (i, &key) in order.iter().enumerate() {
columns.insert(key, parts[i].clone());
}
r_exp.push(columns["r_exp"].clone());
g_exp.push(columns["g_exp"].clone());
coeff.push(columns["coeff"].clone());
}
if !r_exp.iter().all(|x| is_integer(x)) {
bse_raise!(ValueError, "Non-integer value found in r exponents: {:?}", r_exp)?
}
if !g_exp.iter().all(|x| is_floating(x)) {
bse_raise!(ValueError, "Non-floating-point value found in g exponents: {:?}", g_exp)?
}
if !coeff.iter().all(|x| is_floating(x)) {
bse_raise!(ValueError, "Non-floating-point value found in coefficients: {:?}", coeff)?
}
let r_exp = r_exp.into_iter().map(|x| x.parse().unwrap()).collect_vec();
Ok(ReaderECP { r_exp, g_exp, coeff: vec![coeff] })
}
pub fn prune_lines(lines: &[String], skipchars: &str, prune_blank: bool, strip_end_blanks: bool) -> Vec<String> {
let mut processed: Vec<String> = lines.iter().map(|l| l.trim().to_string()).collect();
if !skipchars.is_empty() {
processed.retain(|l| l.is_empty() || !skipchars.contains(l.chars().next().unwrap_or(' ')));
}
if prune_blank {
processed.retain(|l| !l.is_empty());
}
if processed.is_empty() {
return processed;
}
if strip_end_blanks && !prune_blank {
while !processed.is_empty() && processed[0].is_empty() {
processed.remove(0);
}
while !processed.is_empty() && processed.last().unwrap().is_empty() {
processed.pop();
}
}
processed
}
pub fn remove_block(
lines: &[String],
start_re: &Regex,
end_re: &Regex,
) -> Result<(Vec<String>, Vec<String>), BseError> {
let mut start_idx = None;
for (idx, line) in lines.iter().enumerate() {
if start_re.is_match(line) {
if start_idx.is_some() {
bse_raise!(ValueError, "Multiple blocks starting with '{}' found", start_re)?
}
start_idx = Some(idx);
break;
}
}
let start_idx = match start_idx {
Some(idx) => idx,
None => return Ok((Vec::new(), lines.to_vec())), };
let mut block_lines = Vec::new();
let mut i = start_idx + 1;
while i < lines.len() && !end_re.is_match(&lines[i]) {
block_lines.push(lines[i].clone());
i += 1;
}
if i == lines.len() {
bse_raise!(ValueError, "Cannot find end of block. Looking for '{end_re}' to close '{start_re}'")?
}
let mut remaining_lines = lines[..start_idx].to_vec();
remaining_lines.extend_from_slice(&lines[i + 1..]);
Ok((block_lines, remaining_lines))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn playground_line_regex() {
let rex = Regex::new(r"^(?P<sym>[A-Za-z]+)\s+(?P<name>\d+)((?:\s+)+)$").unwrap();
let line = "H 1 ";
let res = parse_line_regex(&rex, line, "Test regex parsing").unwrap();
println!("{res:?}");
let res = parse_line_regex_dict(&rex, line, "Test regex parsing").unwrap();
println!("{res:?}");
}
}