use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
#[derive(Debug, serde::Deserialize)]
struct Record {
scheme: String,
position: String,
residue: String,
atom: String,
charge: f32,
}
#[derive(Debug, Default)]
struct WaterData {
o: Option<f32>,
h1: Option<f32>,
h2: Option<f32>,
}
type AtomData = HashMap<String, HashMap<String, HashMap<String, Vec<(String, f32)>>>>;
type IonData = HashMap<String, HashMap<String, f32>>;
type WaterMap = HashMap<String, WaterData>;
fn sanitize(s: &str) -> String {
s.replace('-', "_").replace('+', "_plus")
}
fn map_name(scheme: &str, pos: &str) -> String {
let scheme_ident = sanitize(scheme).to_uppercase();
let pos_ident = if pos.is_empty() {
"EMPTY".to_string()
} else {
sanitize(pos).to_uppercase()
};
format!("MAP_{}_{}", scheme_ident, pos_ident)
}
struct CodeGenerator {
atom_data: AtomData,
ion_data: IonData,
water_data: WaterMap,
}
impl CodeGenerator {
fn new() -> Self {
Self {
atom_data: HashMap::new(),
ion_data: HashMap::new(),
water_data: HashMap::new(),
}
}
fn load(&mut self, path: &Path) {
let mut rdr = csv::ReaderBuilder::new()
.comment(Some(b'#'))
.trim(csv::Trim::All)
.from_path(path)
.expect("Failed to open data/charges.csv");
for result in rdr.deserialize() {
let record: Record = result.expect("Failed to parse CSV record");
self.process_record(record);
}
}
fn process_record(&mut self, record: Record) {
match (record.residue.as_str(), record.scheme.as_str()) {
("HOH", _) => self.add_water(&record),
(_, "classic") => self.add_ion(&record),
_ => self.add_atom(&record),
}
}
fn add_water(&mut self, record: &Record) {
let entry = self.water_data.entry(record.scheme.clone()).or_default();
match record.atom.as_str() {
"O" => entry.o = Some(record.charge),
"H1" => entry.h1 = Some(record.charge),
"H2" => entry.h2 = Some(record.charge),
other => panic!("Unknown water atom: {}", other),
}
}
fn add_ion(&mut self, record: &Record) {
self.ion_data
.entry(record.scheme.clone())
.or_default()
.insert(record.residue.clone(), record.charge);
}
fn add_atom(&mut self, record: &Record) {
self.atom_data
.entry(record.scheme.clone())
.or_default()
.entry(record.position.clone())
.or_default()
.entry(record.residue.clone())
.or_default()
.push((record.atom.clone(), record.charge));
}
fn generate_lib(&self, path: &Path) {
let mut f = BufWriter::new(File::create(path).unwrap());
writeln!(f, "// Auto-generated by build.rs - DO NOT EDIT").unwrap();
writeln!(f).unwrap();
self.write_atom_maps(&mut f);
self.write_ion_map(&mut f);
self.write_water_map(&mut f);
writeln!(f).unwrap();
writeln!(
f,
"// ============================================================================="
)
.unwrap();
writeln!(f, "// Lookup Functions").unwrap();
writeln!(
f,
"// ============================================================================="
)
.unwrap();
self.write_lookup_fn(&mut f, "get_protein_charge", &["n", "n-", "c", "c+", "m"]);
self.write_lookup_fn(&mut f, "get_nucleic_charge", &["5", "3", "m"]);
self.write_ion_lookup_fn(&mut f);
self.write_water_lookup_fn(&mut f);
}
fn generate_test(&self, path: &Path) {
let mut f = BufWriter::new(File::create(path).unwrap());
writeln!(f, "// Auto-generated by build.rs - DO NOT EDIT").unwrap();
writeln!(
f,
"// This file provides test utilities for charge validation."
)
.unwrap();
writeln!(f).unwrap();
self.write_atom_maps(&mut f);
writeln!(f).unwrap();
writeln!(
f,
"// ============================================================================="
)
.unwrap();
writeln!(f, "// Test Utility Functions").unwrap();
writeln!(
f,
"// ============================================================================="
)
.unwrap();
self.write_atoms_lookup_fn(&mut f, "get_protein_atoms", &["n", "n-", "c", "c+", "m"]);
self.write_atoms_lookup_fn(&mut f, "get_nucleic_atoms", &["5", "3", "m"]);
}
fn write_atom_maps(&self, f: &mut BufWriter<File>) {
for (scheme, pos_map) in &self.atom_data {
for (pos, res_map) in pos_map {
let name = map_name(scheme, pos);
self.write_atom_phf_map(f, &name, res_map);
}
}
}
fn write_atom_phf_map(
&self,
f: &mut BufWriter<File>,
name: &str,
res_map: &HashMap<String, Vec<(String, f32)>>,
) {
let entries: Vec<_> = res_map
.iter()
.map(|(res, atoms)| {
let atoms_str = atoms
.iter()
.map(|(a, c)| format!("(\"{}\", {}_f32)", a, c))
.collect::<Vec<_>>()
.join(", ");
(res.clone(), format!("&[{}]", atoms_str))
})
.collect();
let mut phf = phf_codegen::Map::new();
for (res, val) in &entries {
phf.entry(res.as_str(), val.as_str());
}
writeln!(
f,
"static {}: phf::Map<&'static str, &'static [(&'static str, f32)]> = {};",
name,
phf.build()
)
.unwrap();
}
fn write_ion_map(&self, f: &mut BufWriter<File>) {
for (scheme, res_map) in &self.ion_data {
let name = format!("ION_MAP_{}", sanitize(scheme).to_uppercase());
let entries: Vec<_> = res_map
.iter()
.map(|(res, charge)| (res.clone(), format!("{}_f32", charge)))
.collect();
let mut phf = phf_codegen::Map::new();
for (res, val) in &entries {
phf.entry(res.as_str(), val.as_str());
}
writeln!(
f,
"static {}: phf::Map<&'static str, f32> = {};",
name,
phf.build()
)
.unwrap();
}
}
fn write_water_map(&self, f: &mut BufWriter<File>) {
let entries: Vec<_> = self
.water_data
.iter()
.map(|(scheme, data)| {
let val = format!(
"crate::WaterCharges {{ o: {}_f32, h1: {}_f32, h2: {}_f32 }}",
data.o.expect("Missing O"),
data.h1.expect("Missing H1"),
data.h2.expect("Missing H2")
);
(scheme.clone(), val)
})
.collect();
let mut phf = phf_codegen::Map::new();
for (scheme, val) in &entries {
phf.entry(scheme.as_str(), val.as_str());
}
writeln!(
f,
"static WATER_CHARGES: phf::Map<&'static str, crate::WaterCharges> = {};",
phf.build()
)
.unwrap();
}
fn write_lookup_fn(&self, f: &mut BufWriter<File>, fn_name: &str, positions: &[&str]) {
let arms = self.build_scheme_match_arms(positions, |name| format!("{}.get(res),", name));
writeln!(f).unwrap();
writeln!(f, "#[inline(always)]").unwrap();
writeln!(
f,
"pub fn {}(scheme: &str, pos: &str, res: &str, atom: &str) -> Option<f32> {{",
fn_name
)
.unwrap();
writeln!(f, " let atoms = match scheme {{").unwrap();
for arm in &arms {
writeln!(f, " {}", arm).unwrap();
}
writeln!(f, " _ => None,").unwrap();
writeln!(f, " }}?;").unwrap();
writeln!(f).unwrap();
writeln!(
f,
" atoms.iter().find(|(a, _)| *a == atom).map(|(_, c)| *c)"
)
.unwrap();
writeln!(f, "}}").unwrap();
}
fn write_atoms_lookup_fn(&self, f: &mut BufWriter<File>, fn_name: &str, positions: &[&str]) {
let arms =
self.build_scheme_match_arms(positions, |name| format!("{}.get(res).copied(),", name));
writeln!(f).unwrap();
writeln!(f, "#[inline(always)]").unwrap();
writeln!(
f,
"pub fn {}(scheme: &str, pos: &str, res: &str) -> Option<&'static [(&'static str, f32)]> {{",
fn_name
)
.unwrap();
writeln!(f, " match scheme {{").unwrap();
for arm in &arms {
writeln!(f, " {}", arm).unwrap();
}
writeln!(f, " _ => None,").unwrap();
writeln!(f, " }}").unwrap();
writeln!(f, "}}").unwrap();
}
fn build_scheme_match_arms<F>(&self, positions: &[&str], map_access: F) -> Vec<String>
where
F: Fn(&str) -> String,
{
let mut arms = Vec::new();
for (scheme, pos_map) in &self.atom_data {
let pos_arms: Vec<_> = pos_map
.keys()
.filter(|p| positions.contains(&p.as_str()))
.map(|pos| {
let name = map_name(scheme, pos);
format!("\"{}\" => {}", pos, map_access(&name))
})
.collect();
if !pos_arms.is_empty() {
let pos_match = format!(
"match pos {{\n {}\n _ => None,\n }}",
pos_arms.join("\n ")
);
arms.push(format!(
"\"{}\" => {{\n {}\n }}",
scheme, pos_match
));
}
}
arms
}
fn write_ion_lookup_fn(&self, f: &mut BufWriter<File>) {
let arms: Vec<_> = self
.ion_data
.keys()
.map(|scheme| {
let name = format!("ION_MAP_{}", sanitize(scheme).to_uppercase());
format!("\"{}\" => {}.get(res).copied(),", scheme, name)
})
.collect();
writeln!(f).unwrap();
writeln!(f, "#[inline(always)]").unwrap();
writeln!(
f,
"pub fn get_ion_charge(scheme: &str, res: &str) -> Option<f32> {{"
)
.unwrap();
writeln!(f, " match scheme {{").unwrap();
for arm in &arms {
writeln!(f, " {}", arm).unwrap();
}
writeln!(f, " _ => None,").unwrap();
writeln!(f, " }}").unwrap();
writeln!(f, "}}").unwrap();
}
fn write_water_lookup_fn(&self, f: &mut BufWriter<File>) {
writeln!(f).unwrap();
writeln!(f, "#[inline(always)]").unwrap();
writeln!(
f,
"pub fn get_water_charges(scheme: &str) -> Option<crate::WaterCharges> {{"
)
.unwrap();
writeln!(f, " WATER_CHARGES.get(scheme).copied()").unwrap();
writeln!(f, "}}").unwrap();
}
}
fn main() {
let csv_path = Path::new("data/charges.csv");
println!("cargo:rerun-if-changed={}", csv_path.display());
let out_dir = env::var("OUT_DIR").unwrap();
let out_path = Path::new(&out_dir);
let mut generator = CodeGenerator::new();
generator.load(csv_path);
generator.generate_lib(&out_path.join("codegen.rs"));
generator.generate_test(&out_path.join("codegen_test.rs"));
}