use csv::ReaderBuilder;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::Write;
use std::path::Path;
const RESIDUES: &[(&str, &str, usize, usize)] = &[
("ARG", "Arg", 4, 75),
("ASN", "Asn", 2, 36),
("ASP", "Asp", 2, 18),
("CPR", "Cpr", 3, 2),
("CYD", "Cyd", 1, 3),
("CYH", "Cyh", 1, 3),
("CYS", "Cys", 1, 3),
("GLN", "Gln", 3, 108),
("GLU", "Glu", 3, 54),
("HIS", "His", 2, 36),
("ILE", "Ile", 2, 9),
("LEU", "Leu", 2, 9),
("LYS", "Lys", 4, 73),
("MET", "Met", 3, 27),
("PHE", "Phe", 2, 18),
("PRO", "Pro", 3, 2),
("SER", "Ser", 1, 3),
("THR", "Thr", 1, 3),
("TPR", "Tpr", 3, 2),
("TRP", "Trp", 2, 36),
("TYR", "Tyr", 2, 18),
("VAL", "Val", 1, 3),
];
const GRID_COUNT: usize = 37;
const GRID_MIN: f32 = -180.0;
const GRID_STEP: f32 = 10.0;
const DEG_TO_RAD: f64 = std::f64::consts::PI / 180.0;
#[derive(Debug, Clone)]
struct RawRow {
r: [u8; 4],
prob: f32,
chi_val: [f32; 4],
chi_sig: [f32; 4],
}
fn main() {
println!("cargo::rerun-if-changed=data/dunbrack-2010.lib.csv");
let csv_path = Path::new("data/dunbrack-2010.lib.csv");
assert!(
csv_path.exists(),
"build.rs: CSV file not found at {csv_path:?}"
);
let mut tables: HashMap<String, Vec<Vec<Vec<RawRow>>>> = HashMap::new();
let mut reader = ReaderBuilder::new()
.has_headers(true)
.from_path(csv_path)
.unwrap_or_else(|e| panic!("build.rs: cannot open CSV: {e}"));
for result in reader.records() {
let record = result.unwrap_or_else(|e| panic!("build.rs: malformed CSV row: {e}"));
let res: &str = &record[0];
let phi: f32 = record[1].parse().unwrap();
let psi: f32 = record[2].parse().unwrap();
let r: [u8; 4] = [
record[3].parse().unwrap(),
record[4].parse().unwrap(),
record[5].parse().unwrap(),
record[6].parse().unwrap(),
];
let prob: f32 = record[7].parse().unwrap();
let chi_val: [f32; 4] = [
record[8].parse().unwrap(),
record[9].parse().unwrap(),
record[10].parse().unwrap(),
record[11].parse().unwrap(),
];
let chi_sig: [f32; 4] = [
record[12].parse().unwrap(),
record[13].parse().unwrap(),
record[14].parse().unwrap(),
record[15].parse().unwrap(),
];
let phi_idx = angle_to_index(phi);
let psi_idx = angle_to_index(psi);
let grid = tables
.entry(res.to_string())
.or_insert_with(|| vec![vec![Vec::new(); GRID_COUNT]; GRID_COUNT]);
grid[phi_idx][psi_idx].push(RawRow {
r,
prob,
chi_val,
chi_sig,
});
}
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("tables.rs");
let mut out = fs::File::create(&dest_path).unwrap();
writeln!(out, "// Auto-generated by build.rs — do not edit.").unwrap();
writeln!(out).unwrap();
for &(csv_name, rust_name, n_chi, n_rotamers) in RESIDUES {
let grid = tables
.get(csv_name)
.unwrap_or_else(|| panic!("build.rs: residue '{csv_name}' not found in CSV"));
let table_name = format!("{csv_name}_TABLE");
let keys_name = format!("{csv_name}_KEYS");
for (phi_idx, phi_row) in grid.iter().enumerate() {
for (psi_idx, cell) in phi_row.iter().enumerate() {
assert!(
cell.len() == n_rotamers,
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) has {} rotamers, \
expected {n_rotamers}",
cell.len()
);
for row in cell {
assert!(
row.prob >= 0.0,
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) has negative prob={v}",
v = row.prob
);
for i in 0..n_chi {
assert!(
row.r[i] > 0,
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) has r{j}=0",
j = i + 1
);
assert!(
row.chi_sig[i] > 0.0,
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) has chi_sigma[{j}]={v}",
j = i + 1,
v = row.chi_sig[i]
);
}
}
let prob_sum: f32 = cell.iter().map(|row| row.prob).sum();
assert!(
(0.99..=1.01).contains(&prob_sum),
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) prob sum = {prob_sum:.6}"
);
}
}
for (cell_0, cell_36) in grid[0].iter().zip(grid[36].iter()) {
assert_eq!(
cell_0.len(),
cell_36.len(),
"build.rs: {csv_name} φ=-180/φ=180 rotamer count mismatch"
);
let mut s0: Vec<_> = cell_0.iter().collect();
let mut s36: Vec<_> = cell_36.iter().collect();
s0.sort_by_key(|r| r.r);
s36.sort_by_key(|r| r.r);
for (a, b) in s0.iter().zip(s36.iter()) {
assert!(
a.r == b.r
&& a.prob == b.prob
&& a.chi_val == b.chi_val
&& a.chi_sig == b.chi_sig,
"build.rs: {csv_name} φ=-180/φ=180 data mismatch"
);
}
}
for (phi_idx, phi_row) in grid.iter().enumerate() {
let cell_0 = &phi_row[0];
let cell_36 = &phi_row[36];
assert_eq!(
cell_0.len(),
cell_36.len(),
"build.rs: {csv_name} ψ=-180/ψ=180 rotamer count mismatch at φ idx {phi_idx}"
);
let mut s0: Vec<_> = cell_0.iter().collect();
let mut s36: Vec<_> = cell_36.iter().collect();
s0.sort_by_key(|r| r.r);
s36.sort_by_key(|r| r.r);
for (a, b) in s0.iter().zip(s36.iter()) {
assert!(
a.r == b.r
&& a.prob == b.prob
&& a.chi_val == b.chi_val
&& a.chi_sig == b.chi_sig,
"build.rs: {csv_name} ψ=-180/ψ=180 data mismatch at φ idx {phi_idx}"
);
}
}
let mut sorted_grid: Vec<Vec<Vec<&RawRow>>> =
vec![vec![Vec::new(); GRID_COUNT]; GRID_COUNT];
for (phi_idx, phi_row) in grid.iter().enumerate() {
for (psi_idx, cell) in phi_row.iter().enumerate() {
let mut sorted: Vec<&RawRow> = cell.iter().collect();
sorted.sort_by_key(|row| row.r);
sorted_grid[phi_idx][psi_idx] = sorted;
}
}
let canonical_keys: Vec<[u8; 4]> = sorted_grid[0][0].iter().map(|row| row.r).collect();
for (phi_idx, phi_row) in sorted_grid.iter().enumerate() {
for (psi_idx, cell) in phi_row.iter().enumerate() {
let cell_keys: Vec<[u8; 4]> = cell.iter().map(|row| row.r).collect();
assert!(
cell_keys == canonical_keys,
"build.rs: {csv_name} cell ({phi_idx},{psi_idx}) has different \
bin index key set than cell (0,0)"
);
}
}
emit_keys(&mut out, &keys_name, n_chi, &canonical_keys);
emit_table(&mut out, &table_name, n_chi, n_rotamers, &sorted_grid);
emit_impl(
&mut out,
rust_name,
csv_name,
&table_name,
&keys_name,
n_chi,
n_rotamers,
);
}
emit_for_all_residues_macro(&mut out);
}
fn angle_to_index(deg: f32) -> usize {
let idx = ((deg - GRID_MIN) / GRID_STEP).round() as usize;
assert!(idx < GRID_COUNT, "build.rs: angle {deg}° out of grid range");
idx
}
fn emit_keys(out: &mut fs::File, keys_name: &str, n_chi: usize, canonical_keys: &[[u8; 4]]) {
let n_rotamers = canonical_keys.len();
writeln!(out, "static {keys_name}: [[u8; {n_chi}]; {n_rotamers}] = [").unwrap();
for key in canonical_keys {
let elems: Vec<String> = key[..n_chi].iter().map(|b| b.to_string()).collect();
writeln!(out, "[{}],", elems.join(", ")).unwrap();
}
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
}
fn emit_table(
out: &mut fs::File,
table_name: &str,
n_chi: usize,
n_rotamers: usize,
grid: &[Vec<Vec<&RawRow>>],
) {
writeln!(
out,
"static {table_name}: [[[crate::interp::GridEntry<{n_chi}>; {n_rotamers}]; \
{GRID_COUNT}]; {GRID_COUNT}] = ["
)
.unwrap();
for phi_row in grid.iter() {
writeln!(out, "[").unwrap();
for cell in phi_row.iter() {
writeln!(out, "[").unwrap();
for row in cell.iter() {
emit_grid_entry(out, row, n_chi);
}
writeln!(out, "],").unwrap();
}
writeln!(out, "],").unwrap();
}
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
}
fn emit_grid_entry(out: &mut fs::File, row: &RawRow, n_chi: usize) {
write!(
out,
"crate::interp::GridEntry {{ prob: {}_f32, ",
format_f32(row.prob)
)
.unwrap();
write!(out, "chi_sin: [").unwrap();
for i in 0..n_chi {
if i > 0 {
write!(out, ", ").unwrap();
}
let sin_val = ((row.chi_val[i] as f64) * DEG_TO_RAD).sin() as f32;
write!(out, "{}_f32", format_f32(sin_val)).unwrap();
}
write!(out, "], ").unwrap();
write!(out, "chi_cos: [").unwrap();
for i in 0..n_chi {
if i > 0 {
write!(out, ", ").unwrap();
}
let cos_val = ((row.chi_val[i] as f64) * DEG_TO_RAD).cos() as f32;
write!(out, "{}_f32", format_f32(cos_val)).unwrap();
}
write!(out, "], ").unwrap();
write!(out, "chi_sigma: [").unwrap();
for i in 0..n_chi {
if i > 0 {
write!(out, ", ").unwrap();
}
write!(out, "{}_f32", format_f32(row.chi_sig[i])).unwrap();
}
writeln!(out, "] }},").unwrap();
}
fn format_f32(v: f32) -> String {
format!("{v:?}")
}
fn emit_impl(
out: &mut fs::File,
rust_name: &str,
csv_name: &str,
table_name: &str,
keys_name: &str,
n_chi: usize,
n_rotamers: usize,
) {
writeln!(
out,
"impl crate::sealed::Sealed for crate::residue::{rust_name} {{}}"
)
.unwrap();
writeln!(
out,
"impl crate::residue::Residue for crate::residue::{rust_name} {{"
)
.unwrap();
writeln!(out, " const N_CHI: usize = {n_chi};").unwrap();
writeln!(out, " const N_ROTAMERS: usize = {n_rotamers};").unwrap();
writeln!(out, " const NAME: &'static str = \"{csv_name}\";").unwrap();
writeln!(out, " type Rot = crate::rotamer::Rotamer<{n_chi}>;").unwrap();
writeln!(
out,
" type Iter = crate::interp::RotamerIter<{n_chi}, {n_rotamers}>;"
)
.unwrap();
writeln!(out, " #[inline]").unwrap();
writeln!(
out,
" fn rotamers(phi: f32, psi: f32) -> crate::interp::RotamerIter<{n_chi}, {n_rotamers}> {{"
)
.unwrap();
writeln!(
out,
" crate::interp::build_iter(&{table_name}, &{keys_name}, phi, psi)"
)
.unwrap();
writeln!(out, " }}").unwrap();
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
}
fn emit_for_all_residues_macro(out: &mut fs::File) {
writeln!(
out,
"/// Invokes `$callback!(Type, N_CHI, N_ROTAMERS)` for all 22 residue types."
)
.unwrap();
writeln!(out, "///").unwrap();
writeln!(out, "/// # Examples").unwrap();
writeln!(out, "///").unwrap();
writeln!(out, "/// ```").unwrap();
writeln!(out, "/// # use dunbrack::*;").unwrap();
writeln!(out, "/// use dunbrack::for_all_residues;").unwrap();
writeln!(out, "///").unwrap();
writeln!(out, "/// macro_rules! check {{").unwrap();
writeln!(out, "/// ($Res:ident, $_n:literal, $_r:literal) => {{").unwrap();
writeln!(
out,
"/// assert!(<$Res as Residue>::N_ROTAMERS > 0);"
)
.unwrap();
writeln!(out, "/// }};").unwrap();
writeln!(out, "/// }}").unwrap();
writeln!(out, "/// for_all_residues!(check);").unwrap();
writeln!(out, "/// ```").unwrap();
writeln!(out, "#[macro_export]").unwrap();
writeln!(out, "macro_rules! for_all_residues {{").unwrap();
writeln!(out, " ($callback:ident) => {{").unwrap();
for &(_, rust_name, n_chi, n_rotamers) in RESIDUES {
writeln!(
out,
" $callback!({rust_name}, {n_chi}, {n_rotamers});"
)
.unwrap();
}
writeln!(out, " }};").unwrap();
writeln!(out, "}}").unwrap();
}