import math
import os
import re
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Optional, Tuple
SPECIES_LIST = [
"BrOH_cation_restr",
"BrOH_cation",
"BrOH",
"H_restr",
"Li_restr",
"H",
"Li",
]
def parse_ref_values(source: str) -> Dict[str, List[float]]:
result = {}
func_pattern = re.compile(r"def test_(\w+)_(\d+)_(\w+)\(\):")
ref_pattern = re.compile(
r"ref_tgt\s*=\s*(?:ns\.full_like\([^,]+,\s*)?ns\.asarray\(\[([^\]]+)\]\)",
)
lines = source.split("\n")
current_key = None
for line in lines:
fm = func_pattern.match(line.strip())
if fm:
current_key = fm.group(3) continue
rm = ref_pattern.search(line)
if rm and current_key:
raw = rm.group(1)
values = [float(v.strip()) for v in raw.split(",") if v.strip()]
result[current_key] = values
current_key = None
return result
def split_func_species(stem: str) -> Optional[Tuple[str, str]]:
if not stem.startswith("test_"):
return None
rest = stem[len("test_"):]
for species in SPECIES_LIST:
if rest.endswith("_" + species):
func = rest[: -(len(species) + 1)]
if func:
return func, species
return None
def split_category_xcname(func: str) -> Tuple[str, str]:
hyb_prefixes = ["hyb_lda_", "hyb_gga_", "hyb_mgga_"]
for prefix in hyb_prefixes:
if func.startswith(prefix):
parts = func[len(prefix):].split("_", 1)
if len(parts) == 2:
category = prefix.rstrip("_") + "_" + parts[0]
xc_name = parts[1]
else:
category = prefix.rstrip("_")
xc_name = parts[0]
return category, xc_name
plain_prefixes = ["lda_", "gga_", "mgga_"]
for prefix in plain_prefixes:
if func.startswith(prefix):
parts = func[len(prefix):].split("_", 1)
if len(parts) == 2:
category = prefix.rstrip("_") + "_" + parts[0]
xc_name = parts[1]
else:
category = prefix.rstrip("_")
xc_name = parts[0]
return category, xc_name
parts = func.split("_", 1)
return parts[0], parts[1] if len(parts) > 1 else parts[0]
def write_toml(
data: Dict[str, Dict[str, Dict[str, Dict[str, List[float]]]]],
path: Path,
) -> None:
lines = []
for category in sorted(data):
for xc_name in sorted(data[category]):
for species in sorted(data[category][xc_name]):
lines.append(f"[{category}.{xc_name}.{species}]")
entries = data[category][xc_name][species]
for key in ("zk", "vrho", "vsigma", "vtau", "vlapl"):
if key in entries:
formatted = ", ".join(
f"{v:.15e}" for v in entries[key]
)
lines.append(f"{key} = [{formatted}]")
lines.append("")
path.write_text("\n".join(lines))
def main() -> None:
script_dir = Path(__file__).resolve().parent
default_output = script_dir / "reference.toml"
if len(sys.argv) > 1:
regression_dir = Path(sys.argv[1])
else:
libxc_repo = os.environ.get("LIBXC_REPO_PATH")
if not libxc_repo:
sys.exit("Set LIBXC_REPO_PATH or pass the regression dir as argument")
regression_dir = Path(libxc_repo) / "testsuite/regression"
output_path = Path(sys.argv[2]) if len(sys.argv) > 2 else default_output
ref_data: Dict[str, Dict[str, Dict[str, Dict[str, List[float]]]]] = defaultdict(
lambda: defaultdict(lambda: defaultdict(dict))
)
parsed = 0
nans_skipped = 0
for category_dir in sorted(regression_dir.iterdir()):
if not category_dir.is_dir():
continue
for test_file in sorted(category_dir.glob("test_*.py")):
stem = test_file.stem
result = split_func_species(stem)
if result is None:
continue
func, species = result
source = test_file.read_text()
ref_values = parse_ref_values(source)
if not ref_values:
continue
cat, xc_name = split_category_xcname(func)
for key, values in ref_values.items():
if any(math.isnan(v) for v in values):
nans_skipped += 1
continue
ref_data[cat][xc_name][species][key] = values
parsed += 1
print(f"Parsed {parsed} reference entries")
if nans_skipped:
print(f"Skipped {nans_skipped} entries with NaN values")
write_toml(ref_data, output_path)
print(f"Wrote {output_path}")
if __name__ == "__main__":
main()