use rayon::prelude::*;
#[derive(Clone, Debug)]
pub struct Candidate {
pub bitstream_length: u32,
pub decorrelator: u8, pub mode: u8, pub luts: i64,
pub power: f64,
pub accuracy: f64,
pub latency: i64,
}
pub fn estimate_resources(mac_count: i64, length: u32, decorr: u8, mode: u8) -> Candidate {
let len = length as f64;
let log2_len = (len.log2()).floor() as i64;
if mode == 1 {
return Candidate {
bitstream_length: length,
decorrelator: decorr,
mode,
luts: mac_count * 120,
power: mac_count as f64 * 0.5,
accuracy: 1.0,
latency: 1,
};
}
if mode == 2 {
let sc_frac = 0.7_f64;
let det_frac = 0.3_f64;
let sc_macs = (mac_count as f64 * sc_frac) as i64;
let det_macs = (mac_count as f64 * det_frac) as i64;
let mut luts = sc_macs * 2 + log2_len * 5 + det_macs * 120;
let power = sc_macs as f64 * 0.01 * (len / 256.0) + det_macs as f64 * 0.5;
let mut accuracy = 0.95_f64;
match decorr {
2 => {
luts += (sc_macs as f64 * 15.0) as i64;
accuracy = 0.97;
}
1 => {
luts += 16;
accuracy = 0.96;
}
_ => {}
}
return Candidate {
bitstream_length: length,
decorrelator: decorr,
mode,
luts,
power,
accuracy: accuracy.min(1.0),
latency: length as i64,
};
}
let mut luts = mac_count * 2 + log2_len * 5;
let power = mac_count as f64 * 0.01 * (len / 256.0);
let accuracy = match decorr {
2 => {
luts += mac_count * 15;
1.0 - 1.0 / len
} 3 => {
luts += mac_count * 12;
1.0 - 1.2 / len
} 4 => {
luts += mac_count * 8;
1.0 - 1.5 / len
} 1 => {
luts += 16;
1.0 - 1.0 / len.sqrt()
} _ => 1.0 - 2.0 / len.sqrt(), };
Candidate {
bitstream_length: length,
decorrelator: decorr,
mode,
luts,
power,
accuracy: accuracy.clamp(0.1, 1.0),
latency: length as i64,
}
}
pub fn generate_candidates(mac_count: i64) -> Vec<Candidate> {
let lengths: &[u32] = &[64, 128, 256, 512, 1024, 2048];
let decorrs: &[u8] = &[0, 1, 2, 3, 4];
let mut result = Vec::with_capacity(1 + lengths.len() * decorrs.len() * 2);
result.push(estimate_resources(mac_count, 1, 0, 1));
for &mode in &[0u8, 2] {
for &length in lengths {
for &decorr in decorrs {
result.push(estimate_resources(mac_count, length, decorr, mode));
}
}
}
result
}
struct Xoshiro256pp([u64; 4]);
impl Xoshiro256pp {
fn new(seed: u64) -> Self {
let mut s = [0u64; 4];
let mut z = seed.wrapping_add(0x9E3779B97F4A7C15);
for slot in s.iter_mut() {
z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9);
z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB);
*slot = z ^ (z >> 31);
}
Self(s)
}
fn next_u64(&mut self) -> u64 {
let result = (self.0[0].wrapping_add(self.0[3]))
.rotate_left(23)
.wrapping_add(self.0[0]);
let t = self.0[1] << 17;
self.0[2] ^= self.0[0];
self.0[3] ^= self.0[1];
self.0[1] ^= self.0[2];
self.0[0] ^= self.0[3];
self.0[2] ^= t;
self.0[3] = self.0[3].rotate_left(45);
result
}
fn next_usize(&mut self, bound: usize) -> usize {
(self.next_u64() % bound as u64) as usize
}
fn next_f64(&mut self) -> f64 {
(self.next_u64() >> 11) as f64 * (1.0 / (1u64 << 53) as f64)
}
}
#[derive(Clone, Debug)]
pub struct SAResult {
pub best_config: Vec<usize>, pub best_score: f64,
pub pareto_luts: Vec<i64>,
pub pareto_power: Vec<f64>,
pub pareto_score: Vec<f64>,
}
pub fn simulated_annealing(
candidates: &[Vec<Candidate>],
weights: &[f64],
max_luts: i64,
max_power: f64,
max_latency: i64,
t_init: f64,
t_min: f64,
alpha: f64,
max_iter: usize,
seed: u64,
) -> Option<SAResult> {
let n = candidates.len();
let mut rng = Xoshiro256pp::new(seed);
let mut current: Vec<usize> = (0..n)
.map(|i| {
candidates[i]
.iter()
.enumerate()
.min_by_key(|(_, c)| c.luts)
.map(|(idx, _)| idx)
.unwrap_or(0)
})
.collect();
if !is_feasible(candidates, ¤t, max_luts, max_power, max_latency) {
return None;
}
let weight_sum: f64 = weights.iter().sum();
let score_fn = |cfg: &[usize]| -> f64 {
let mut total = 0.0;
for i in 0..n {
total += candidates[i][cfg[i]].accuracy * weights[i];
}
total / weight_sum
};
let mut best = current.clone();
let mut best_score = score_fn(&best);
let mut current_score = best_score;
let mut t = t_init;
let mut pareto_luts = Vec::new();
let mut pareto_power = Vec::new();
let mut pareto_score = Vec::new();
for _ in 0..max_iter {
if t <= t_min {
break;
}
let layer = rng.next_usize(n);
let cand_idx = rng.next_usize(candidates[layer].len());
let old = current[layer];
current[layer] = cand_idx;
if !is_feasible(candidates, ¤t, max_luts, max_power, max_latency) {
current[layer] = old;
t *= alpha;
continue;
}
let trial_score = score_fn(¤t);
let delta = trial_score - current_score;
if delta > 0.0 || rng.next_f64() < (delta / t).exp() {
current_score = trial_score;
if current_score > best_score {
best = current.clone();
best_score = current_score;
}
let luts: i64 = (0..n).map(|i| candidates[i][current[i]].luts).sum();
let power: f64 = (0..n).map(|i| candidates[i][current[i]].power).sum();
pareto_luts.push(luts);
pareto_power.push(power);
pareto_score.push(current_score);
} else {
current[layer] = old;
}
t *= alpha;
}
Some(SAResult {
best_config: best,
best_score,
pareto_luts,
pareto_power,
pareto_score,
})
}
fn is_feasible(
candidates: &[Vec<Candidate>],
config: &[usize],
max_luts: i64,
max_power: f64,
max_latency: i64,
) -> bool {
let n = candidates.len();
let mut total_luts: i64 = 0;
let mut total_power: f64 = 0.0;
let mut max_lat: i64 = 0;
for i in 0..n {
let c = &candidates[i][config[i]];
total_luts += c.luts;
total_power += c.power;
if c.latency > max_lat {
max_lat = c.latency;
}
}
total_luts <= max_luts
&& total_power <= max_power
&& (max_latency <= 0 || max_lat <= max_latency)
}
pub fn extract_pareto(luts: &[i64], power: &[f64], score: &[f64]) -> Vec<usize> {
let n = luts.len();
if n == 0 {
return vec![];
}
let indices: Vec<usize> = (0..n)
.into_par_iter()
.filter(|&i| {
for j in 0..n {
if j == i {
continue;
}
if luts[j] <= luts[i]
&& power[j] <= power[i]
&& score[j] >= score[i]
&& (luts[j] < luts[i] || power[j] < power[i] || score[j] > score[i])
{
return false;
}
}
true
})
.collect();
indices
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_deterministic() {
let c = estimate_resources(100, 1, 0, 1);
assert_eq!(c.luts, 12000);
assert!((c.accuracy - 1.0).abs() < 1e-10);
assert_eq!(c.latency, 1);
}
#[test]
fn test_estimate_sc_sobol() {
let c = estimate_resources(10, 256, 2, 0);
assert!(c.luts > 0);
assert!(c.accuracy > 0.99);
}
#[test]
fn test_estimate_hybrid() {
let c = estimate_resources(10, 256, 2, 2);
assert!((c.accuracy - 0.97).abs() < 1e-10);
}
#[test]
fn test_generate_candidates_count() {
let cands = generate_candidates(10);
assert_eq!(cands.len(), 61);
}
#[test]
fn test_sa_finds_solution() {
let cands: Vec<Vec<Candidate>> = (0..3).map(|_| generate_candidates(10)).collect();
let weights = vec![1.0, 1.0, 2.0];
let result = simulated_annealing(
&cands, &weights, 100_000, 1000.0, 0, 1.0, 0.001, 0.95, 500, 42,
);
assert!(result.is_some());
let r = result.unwrap();
assert!(r.best_score > 0.5);
}
#[test]
fn test_sa_infeasible() {
let cands: Vec<Vec<Candidate>> = (0..3).map(|_| generate_candidates(10)).collect();
let weights = vec![1.0, 1.0, 1.0];
let result = simulated_annealing(&cands, &weights, 1, 0.001, 0, 1.0, 0.001, 0.95, 100, 42);
assert!(result.is_none());
}
#[test]
fn test_pareto_single_point() {
let indices = extract_pareto(&[100], &[1.0], &[0.9]);
assert_eq!(indices, vec![0]);
}
#[test]
fn test_pareto_dominated() {
let indices = extract_pareto(&[200, 100], &[2.0, 1.0], &[0.8, 0.9]);
assert_eq!(indices, vec![1]);
}
#[test]
fn test_pareto_both_nondominated() {
let indices = extract_pareto(&[100, 200], &[1.0, 2.0], &[0.8, 0.95]);
assert_eq!(indices.len(), 2);
}
#[test]
fn test_xoshiro_reproducible() {
let mut rng1 = Xoshiro256pp::new(42);
let mut rng2 = Xoshiro256pp::new(42);
for _ in 0..100 {
assert_eq!(rng1.next_u64(), rng2.next_u64());
}
}
}