Skip to main content

scirs2_optimize/bayesian/
transfer_bo.rs

1//! Transfer Bayesian Optimization.
2//!
3//! Enables knowledge transfer from previously optimized (source) tasks to a
4//! new (target) task, dramatically reducing the number of expensive evaluations
5//! required to find good solutions.
6//!
7//! # Approach
8//!
9//! 1. **Task similarity**: Each source task is assigned a weight proportional
10//!    to `exp(-distance / temperature)` where `distance` is derived from the
11//!    spatial overlap between source observations and the target search space.
12//!
13//! 2. **Weighted surrogate**: We maintain one GP per source task and one GP
14//!    for the target task.  The acquisition function is the weighted combination
15//!
16//!    ```text
17//!      acq(x) = w_t * EI_target(x) + (1 - w_t) * sum_s w_s * EI_source_s(x)
18//!    ```
19//!
20//!    where `w_t` starts at 0 (pure transfer) and rises linearly toward 1 as
21//!    more target evaluations accumulate (adaptive weighting).
22//!
23//! 3. **Warm-start**: Source task observations that fall inside the target
24//!    domain are injected into the target GP as low-weight pseudo-observations,
25//!    giving the model a head-start.
26//!
27//! # Quick Start
28//!
29//! ```rust
30//! use scirs2_optimize::bayesian::transfer_bo::{
31//!     TransferBo, TransferBoConfig, TaskObservations,
32//! };
33//! use scirs2_core::ndarray::{array, Array2, Array1};
34//!
35//! // Pretend we have a source task: f_src(x) = x^2, sampled at a few points.
36//! let x_src = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("valid input");
37//! let y_src = Array1::from_vec(vec![0.0, 1.0, 4.0]);
38//! let src_task = TaskObservations {
39//!     x: x_src,
40//!     y: y_src,
41//!     bounds: vec![(0.0_f64, 3.0_f64)],
42//! };
43//!
44//! let target_bounds = vec![(0.0_f64, 3.0_f64)];
45//! let config = TransferBoConfig { seed: Some(7), n_initial: 3, ..Default::default() };
46//!
47//! let mut tbo = TransferBo::new(vec![src_task], target_bounds, config)
48//!     .expect("build tbo");
49//!
50//! let result = tbo.optimize(|x: &[f64]| (x[0] - 0.5_f64).powi(2), 15)
51//!     .expect("optimize");
52//!
53//! println!("Best x: {:?}  f: {:.4}", result.x_best, result.f_best);
54//! ```
55
56use scirs2_core::ndarray::{Array1, Array2};
57use scirs2_core::random::rngs::StdRng;
58use scirs2_core::random::{Rng, RngExt, SeedableRng};
59
60use crate::error::{OptimizeError, OptimizeResult};
61
62use super::gp::{GpSurrogate, GpSurrogateConfig, RbfKernel};
63use super::sampling::{generate_samples, SamplingConfig, SamplingStrategy};
64
65// ---------------------------------------------------------------------------
66// Task observations
67// ---------------------------------------------------------------------------
68
69/// Observations from a completed optimization task.
70#[derive(Debug, Clone)]
71pub struct TaskObservations {
72    /// Input matrix (n_obs × n_dims).
73    pub x: Array2<f64>,
74    /// Output vector (n_obs,).
75    pub y: Array1<f64>,
76    /// Search-space bounds used for the task [(lo, hi), ...].
77    pub bounds: Vec<(f64, f64)>,
78}
79
80// ---------------------------------------------------------------------------
81// Task similarity
82// ---------------------------------------------------------------------------
83
84/// Compute the similarity between a source task and the target search space.
85///
86/// The metric is based on the fraction of source observations that fall within
87/// the target bounds, weighted by how centrally they sit.
88///
89/// Returns a value in `[0, 1]` where 1 means perfect overlap.
90pub fn compute_task_similarity(source_obs: &TaskObservations, target_bounds: &[(f64, f64)]) -> f64 {
91    if source_obs.x.is_empty() || target_bounds.is_empty() {
92        return 0.0;
93    }
94
95    let n_dims = target_bounds.len().min(source_obs.x.ncols());
96    let n = source_obs.x.nrows();
97    let mut total_score = 0.0;
98
99    for row in 0..n {
100        let mut point_score = 1.0;
101        for d in 0..n_dims {
102            let lo = target_bounds[d].0;
103            let hi = target_bounds[d].1;
104            let range = (hi - lo).abs().max(1e-12);
105            let v = source_obs.x[[row, d]];
106
107            if v < lo || v > hi {
108                // Outside bounds: compute exponential penalty based on distance.
109                let dist = if v < lo { lo - v } else { v - hi };
110                point_score *= (-5.0 * dist / range).exp();
111            } else {
112                // Inside bounds: use a tent function peaking at the center.
113                let center = 0.5 * (lo + hi);
114                let half_range = 0.5 * range;
115                let centrality = 1.0 - (v - center).abs() / half_range;
116                point_score *= 0.5 + 0.5 * centrality; // in [0.5, 1]
117            }
118        }
119        total_score += point_score;
120    }
121
122    (total_score / n as f64).clamp(0.0, 1.0)
123}
124
125// ---------------------------------------------------------------------------
126// Transfer BO configuration & result
127// ---------------------------------------------------------------------------
128
129/// Configuration for Transfer Bayesian Optimization.
130#[derive(Debug, Clone)]
131pub struct TransferBoConfig {
132    /// Number of initial random evaluations on the target task.
133    pub n_initial: usize,
134    /// Exploration bonus xi for EI.
135    pub xi: f64,
136    /// Number of random candidates evaluated when maximising the acquisition.
137    pub n_candidates: usize,
138    /// Temperature controlling how sharply task similarity translates to weight.
139    pub similarity_temperature: f64,
140    /// Random seed.
141    pub seed: Option<u64>,
142    /// Verbosity.
143    pub verbose: usize,
144}
145
146impl Default for TransferBoConfig {
147    fn default() -> Self {
148        Self {
149            n_initial: 5,
150            xi: 0.01,
151            n_candidates: 200,
152            similarity_temperature: 0.5,
153            seed: None,
154            verbose: 0,
155        }
156    }
157}
158
159/// Result of transfer Bayesian optimization.
160#[derive(Debug, Clone)]
161pub struct TransferBoResult {
162    /// Best input point found.
163    pub x_best: Array1<f64>,
164    /// Best objective value.
165    pub f_best: f64,
166    /// Source task similarity weights used.
167    pub source_weights: Vec<f64>,
168    /// Number of target evaluations performed.
169    pub n_target_evals: usize,
170    /// Trajectory: (iteration, f_value).
171    pub history: Vec<(usize, f64)>,
172}
173
174// ---------------------------------------------------------------------------
175// Internal source-task surrogate
176// ---------------------------------------------------------------------------
177
178struct SourceModel {
179    gp: GpSurrogate,
180    similarity: f64,
181    weight: f64,
182}
183
184// ---------------------------------------------------------------------------
185// Transfer BO
186// ---------------------------------------------------------------------------
187
188/// Transfer Bayesian Optimizer.
189///
190/// Uses source-task GP surrogates to accelerate optimization on the target task.
191pub struct TransferBo {
192    source_tasks: Vec<TaskObservations>,
193    target_bounds: Vec<(f64, f64)>,
194    config: TransferBoConfig,
195}
196
197impl TransferBo {
198    /// Create a new transfer BO instance.
199    pub fn new(
200        source_tasks: Vec<TaskObservations>,
201        target_bounds: Vec<(f64, f64)>,
202        config: TransferBoConfig,
203    ) -> OptimizeResult<Self> {
204        if target_bounds.is_empty() {
205            return Err(OptimizeError::InvalidInput(
206                "Target bounds must not be empty".to_string(),
207            ));
208        }
209        Ok(Self {
210            source_tasks,
211            target_bounds,
212            config,
213        })
214    }
215
216    /// Optimize the target objective using transfer from source tasks.
217    ///
218    /// `n_iterations` is the total number of target evaluations (excluding
219    /// any pseudo-observations from source tasks).
220    pub fn optimize<F>(
221        &mut self,
222        objective: F,
223        n_iterations: usize,
224    ) -> OptimizeResult<TransferBoResult>
225    where
226        F: Fn(&[f64]) -> f64,
227    {
228        if n_iterations == 0 {
229            return Err(OptimizeError::InvalidInput(
230                "n_iterations must be positive".to_string(),
231            ));
232        }
233
234        let n_dims = self.target_bounds.len();
235        let seed = self.config.seed.unwrap_or(42);
236        let mut rng = StdRng::seed_from_u64(seed);
237
238        // -----------------------------------------------------------------
239        // 1. Build & fit source surrogates.
240        // -----------------------------------------------------------------
241        let mut source_models: Vec<SourceModel> = self
242            .source_tasks
243            .iter()
244            .map(|task| {
245                let gp_cfg = GpSurrogateConfig {
246                    noise_variance: 1e-4,
247                    optimize_hyperparams: true,
248                    n_restarts: 2,
249                    max_opt_iters: 30,
250                };
251                let mut gp = GpSurrogate::new(Box::new(RbfKernel::default()), gp_cfg);
252                if !task.x.is_empty() {
253                    let _ = gp.fit(&task.x, &task.y);
254                }
255                let similarity = compute_task_similarity(task, &self.target_bounds);
256                SourceModel {
257                    gp,
258                    similarity,
259                    weight: similarity,
260                }
261            })
262            .collect();
263
264        // Normalise source weights (softmax-style via temperature).
265        let temp = self.config.similarity_temperature.max(1e-6);
266        let raw_weights: Vec<f64> = source_models
267            .iter()
268            .map(|m| (m.similarity / temp).exp())
269            .collect();
270        let w_sum: f64 = raw_weights.iter().sum::<f64>() + 1e-12;
271        for (i, m) in source_models.iter_mut().enumerate() {
272            m.weight = raw_weights[i] / w_sum;
273        }
274
275        let source_weights: Vec<f64> = source_models.iter().map(|m| m.weight).collect();
276
277        // -----------------------------------------------------------------
278        // 2. Warm-start the target GP with in-domain source observations.
279        // -----------------------------------------------------------------
280        let mut target_x_buf: Vec<Vec<f64>> = Vec::new();
281        let mut target_y_buf: Vec<f64> = Vec::new();
282
283        for (task, model) in self.source_tasks.iter().zip(source_models.iter()) {
284            if model.similarity < 0.3 {
285                continue; // Skip dissimilar tasks.
286            }
287            let ndims_task = task.x.ncols().min(n_dims);
288            for row in 0..task.x.nrows() {
289                let mut in_domain = true;
290                for d in 0..ndims_task {
291                    let v = task.x[[row, d]];
292                    if v < self.target_bounds[d].0 || v > self.target_bounds[d].1 {
293                        in_domain = false;
294                        break;
295                    }
296                }
297                if in_domain {
298                    let mut x_row = vec![0.0f64; n_dims];
299                    for d in 0..ndims_task {
300                        x_row[d] = task.x[[row, d]];
301                    }
302                    target_x_buf.push(x_row);
303                    target_y_buf.push(task.y[row]);
304                }
305            }
306        }
307
308        // -----------------------------------------------------------------
309        // 3. Target GP.
310        // -----------------------------------------------------------------
311        let gp_cfg_target = GpSurrogateConfig {
312            noise_variance: 1e-6,
313            optimize_hyperparams: true,
314            n_restarts: 3,
315            max_opt_iters: 50,
316        };
317        let mut target_gp = GpSurrogate::new(Box::new(RbfKernel::default()), gp_cfg_target);
318
319        // -----------------------------------------------------------------
320        // 4. Initial random evaluations on the target.
321        // -----------------------------------------------------------------
322        let n_init = self.config.n_initial.min(n_iterations).max(2);
323        let lhs_cfg = SamplingConfig {
324            seed: Some(seed),
325            ..SamplingConfig::default()
326        };
327        let x_init = generate_samples(
328            n_init,
329            &self.target_bounds,
330            SamplingStrategy::LatinHypercube,
331            Some(lhs_cfg),
332        )?;
333
334        let mut history: Vec<(usize, f64)> = Vec::new();
335        let mut best_y = f64::INFINITY;
336        let mut best_x: Option<Array1<f64>> = None;
337        let mut n_target_evals = 0usize;
338
339        for i in 0..x_init.nrows() {
340            let xi = x_init.row(i).to_owned();
341            let x_slice: Vec<f64> = xi.iter().copied().collect();
342            let y = objective(&x_slice);
343            n_target_evals += 1;
344
345            target_x_buf.push(x_slice);
346            target_y_buf.push(y);
347
348            if y < best_y {
349                best_y = y;
350                best_x = Some(xi);
351            }
352            history.push((n_target_evals, y));
353        }
354
355        // Fit target GP with initial data.
356        self.fit_target_gp(&mut target_gp, &target_x_buf, &target_y_buf, n_dims)?;
357
358        // -----------------------------------------------------------------
359        // 5. Main BO loop.
360        // -----------------------------------------------------------------
361        for iter in n_init..n_iterations {
362            let n_cands = self.config.n_candidates;
363
364            // Adaptive target weight: rises linearly from 0 to 1 as more
365            // target observations accumulate.
366            let target_weight = (iter as f64 / n_iterations as f64).min(1.0);
367
368            // Sample candidates.
369            let mut candidates = Array2::zeros((n_cands, n_dims));
370            for r in 0..n_cands {
371                for c in 0..n_dims {
372                    let lo = self.target_bounds[c].0;
373                    let hi = self.target_bounds[c].1;
374                    candidates[[r, c]] = lo + rng.random::<f64>() * (hi - lo);
375                }
376            }
377
378            let current_best = if best_y.is_finite() { best_y } else { 0.0 };
379
380            // Evaluate weighted acquisition over all candidates.
381            let mut best_acq = f64::NEG_INFINITY;
382            let mut best_row = 0;
383
384            for r in 0..n_cands {
385                let x_row = candidates.row(r).to_owned();
386                let x_mat = match x_row.into_shape_with_order((1, n_dims)) {
387                    Ok(m) => m,
388                    Err(_) => continue,
389                };
390
391                // Target EI.
392                let target_ei = if target_gp.n_train() >= 2 {
393                    ei_single(&x_mat, &target_gp, current_best, self.config.xi).unwrap_or(0.0)
394                } else {
395                    0.0
396                };
397
398                // Source EI (weighted sum).
399                let mut source_ei = 0.0;
400                for sm in &source_models {
401                    if sm.gp.n_train() == 0 {
402                        continue;
403                    }
404                    let s_ei =
405                        ei_single(&x_mat, &sm.gp, current_best, self.config.xi).unwrap_or(0.0);
406                    source_ei += sm.weight * s_ei;
407                }
408
409                let acq = target_weight * target_ei + (1.0 - target_weight) * source_ei;
410
411                if acq > best_acq {
412                    best_acq = acq;
413                    best_row = r;
414                }
415            }
416
417            // Evaluate the chosen candidate.
418            let x_next = candidates.row(best_row).to_owned();
419            let x_slice: Vec<f64> = x_next.iter().copied().collect();
420            let y_next = objective(&x_slice);
421            n_target_evals += 1;
422
423            target_x_buf.push(x_slice);
424            target_y_buf.push(y_next);
425
426            if y_next < best_y {
427                best_y = y_next;
428                best_x = Some(x_next);
429            }
430
431            history.push((n_target_evals, y_next));
432
433            if self.config.verbose >= 2 {
434                println!(
435                    "[TBO] iter={} w_t={:.2} f={:.6} best={:.6}",
436                    iter, target_weight, y_next, best_y
437                );
438            }
439
440            // Refit target GP.
441            self.fit_target_gp(&mut target_gp, &target_x_buf, &target_y_buf, n_dims)?;
442        }
443
444        if self.config.verbose >= 1 {
445            println!(
446                "[TBO] Done. n_evals={} best_f={:.6}",
447                n_target_evals, best_y
448            );
449        }
450
451        let x_best = best_x.unwrap_or_else(|| Array1::zeros(n_dims));
452
453        Ok(TransferBoResult {
454            x_best,
455            f_best: best_y,
456            source_weights,
457            n_target_evals,
458            history,
459        })
460    }
461
462    // -----------------------------------------------------------------------
463    // Internal helpers
464    // -----------------------------------------------------------------------
465
466    fn fit_target_gp(
467        &self,
468        gp: &mut GpSurrogate,
469        x_buf: &[Vec<f64>],
470        y_buf: &[f64],
471        n_dims: usize,
472    ) -> OptimizeResult<()> {
473        if x_buf.is_empty() {
474            return Ok(());
475        }
476        let n = x_buf.len().min(y_buf.len());
477        if n == 0 {
478            return Ok(());
479        }
480
481        let mut x_mat = Array2::zeros((n, n_dims));
482        let mut y_vec = Array1::zeros(n);
483
484        for (i, (xv, &yv)) in x_buf.iter().zip(y_buf.iter()).enumerate().take(n) {
485            for d in 0..n_dims.min(xv.len()) {
486                x_mat[[i, d]] = xv[d];
487            }
488            y_vec[i] = yv;
489        }
490
491        gp.fit(&x_mat, &y_vec)
492    }
493}
494
495// ---------------------------------------------------------------------------
496// EI helper (uses gp.predict internally)
497// ---------------------------------------------------------------------------
498
499fn erf_approx(x: f64) -> f64 {
500    let p = 0.3275911_f64;
501    let (a1, a2, a3, a4, a5) = (
502        0.254829592_f64,
503        -0.284496736_f64,
504        1.421413741_f64,
505        -1.453152027_f64,
506        1.061405429_f64,
507    );
508    let sign = if x < 0.0 { -1.0 } else { 1.0 };
509    let xa = x.abs();
510    let t = 1.0 / (1.0 + p * xa);
511    let poly = (((a5 * t + a4) * t + a3) * t + a2) * t + a1;
512    sign * (1.0 - poly * t * (-xa * xa).exp())
513}
514
515fn norm_cdf(z: f64) -> f64 {
516    0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
517}
518
519fn norm_pdf(z: f64) -> f64 {
520    (-0.5 * z * z).exp() / (2.0 * std::f64::consts::PI).sqrt()
521}
522
523fn ei_single(x_mat: &Array2<f64>, gp: &GpSurrogate, best_y: f64, xi: f64) -> OptimizeResult<f64> {
524    let (mean, var) = gp.predict(x_mat)?;
525    let mu = mean[0];
526    let sigma = var[0].max(0.0).sqrt();
527    if sigma < 1e-12 {
528        return Ok(0.0);
529    }
530    let z = (best_y - mu - xi) / sigma;
531    let ei = (best_y - mu - xi) * norm_cdf(z) + sigma * norm_pdf(z);
532    Ok(ei.max(0.0))
533}
534
535// ---------------------------------------------------------------------------
536// Tests
537// ---------------------------------------------------------------------------
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542    use scirs2_core::ndarray::{array, Array1, Array2};
543
544    fn simple_source_task() -> TaskObservations {
545        // f(x) = x^2 on [0, 3].
546        let x = Array2::from_shape_vec((5, 1), vec![0.0, 0.75, 1.5, 2.25, 3.0]).expect("shape");
547        let y = Array1::from_vec(vec![0.0, 0.5625, 2.25, 5.0625, 9.0]);
548        TaskObservations {
549            x,
550            y,
551            bounds: vec![(0.0_f64, 3.0_f64)],
552        }
553    }
554
555    #[test]
556    fn test_compute_task_similarity_same_bounds() {
557        let task = simple_source_task();
558        let target_bounds = vec![(0.0_f64, 3.0_f64)];
559        let sim = compute_task_similarity(&task, &target_bounds);
560        assert!(sim > 0.5 && sim <= 1.0, "similarity={}", sim);
561    }
562
563    #[test]
564    fn test_compute_task_similarity_disjoint() {
565        let task = simple_source_task();
566        // Target domain is completely to the right of source data.
567        let target_bounds = vec![(10.0_f64, 20.0_f64)];
568        let sim = compute_task_similarity(&task, &target_bounds);
569        // Should be very low (close to 0).
570        assert!(sim < 0.1, "Expected low similarity, got {}", sim);
571    }
572
573    #[test]
574    fn test_transfer_bo_optimizes() {
575        let src = simple_source_task();
576        let target_bounds = vec![(0.0_f64, 3.0_f64)];
577        let config = TransferBoConfig {
578            n_initial: 3,
579            n_candidates: 50,
580            seed: Some(99),
581            verbose: 0,
582            ..Default::default()
583        };
584        let mut tbo = TransferBo::new(vec![src], target_bounds, config).expect("build tbo");
585
586        // Target: f(x) = (x - 0.5)^2
587        let result = tbo
588            .optimize(|x: &[f64]| (x[0] - 0.5_f64).powi(2), 12)
589            .expect("optimize");
590
591        assert!(result.f_best.is_finite());
592        assert!(result.f_best < 1.5, "f_best={}", result.f_best);
593        assert_eq!(result.n_target_evals, 12);
594    }
595
596    #[test]
597    fn test_transfer_bo_no_sources() {
598        // Should still work with empty source list.
599        let target_bounds = vec![(0.0_f64, 5.0_f64)];
600        let config = TransferBoConfig {
601            n_initial: 3,
602            n_candidates: 30,
603            seed: Some(7),
604            ..Default::default()
605        };
606        let mut tbo = TransferBo::new(vec![], target_bounds, config).expect("build tbo");
607        let result = tbo
608            .optimize(|x: &[f64]| (x[0] - 2.5_f64).powi(2), 8)
609            .expect("optimize");
610        assert!(result.f_best.is_finite());
611    }
612
613    #[test]
614    fn test_transfer_bo_source_weights_sum_close_to_one() {
615        let task1 = simple_source_task();
616        let mut task2 = simple_source_task();
617        // Shift task2 to a different domain.
618        for i in 0..task2.x.nrows() {
619            task2.x[[i, 0]] += 5.0;
620        }
621        task2.bounds = vec![(5.0_f64, 8.0_f64)];
622
623        let target_bounds = vec![(0.0_f64, 3.0_f64)];
624        let config = TransferBoConfig {
625            n_initial: 2,
626            n_candidates: 20,
627            seed: Some(1),
628            ..Default::default()
629        };
630        let mut tbo = TransferBo::new(vec![task1, task2], target_bounds, config).expect("build");
631        let result = tbo.optimize(|x: &[f64]| x[0].powi(2), 5).expect("opt");
632
633        let w_sum: f64 = result.source_weights.iter().sum();
634        // Softmax-normalized weights sum to approximately 1 (they're raw weights
635        // that we normalised against w_sum).
636        assert!((w_sum - 1.0).abs() < 1e-6, "source weights sum={}", w_sum);
637    }
638}