1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
//! This crate provides a hyperparameter optimization algorithm using TPE (Tree-structured Parzen Estimator).
//!
//! # Examples
//!
//! An example optimizing a simple quadratic function which has one numerical and one categorical parameters.
//! ```
//! use rand::SeedableRng as _;
//!
//! # fn main() -> anyhow::Result<()> {
//! let choices = [1, 10, 100];
//! let mut optim0 =
//!     tpe::TpeOptimizer::new(tpe::parzen_estimator(), tpe::range(-5.0, 5.0)?);
//! let mut optim1 =
//!     tpe::TpeOptimizer::new(tpe::histogram_estimator(), tpe::categorical_range(choices.len())?);
//!
//! fn objective(x: f64, y: i32) -> f64 {
//!     x.powi(2) + y as f64
//! }
//!
//! let mut best_value = std::f64::INFINITY;
//! let mut rng = rand::rngs::StdRng::from_seed(Default::default());
//! for _ in 0..100 {
//!    let x = optim0.ask(&mut rng)?;
//!    let y = optim1.ask(&mut rng)?;
//!
//!    let v = objective(x, choices[y as usize]);
//!    optim0.tell(x, v)?;
//!    optim1.tell(y, v)?;
//!    best_value = best_value.min(v);
//! }
//!
//! assert_eq!(best_value, 1.000054276671888);
//! # Ok(())
//! # }
//! ```
//!
//! # References
//!
//! Please refer to the following papers about the details of TPE:
//!
//! - [Algorithms for Hyper-Parameter Optimization](https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf)
//! - [Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures](http://proceedings.mlr.press/v28/bergstra13.pdf)
#![warn(missing_docs)]
use crate::density_estimation::{BuildDensityEstimator, DefaultEstimatorBuilder, DensityEstimator};
#[cfg(doc)]
use crate::density_estimation::{HistogramEstimator, ParzenEstimator};
use crate::range::{Range, RangeError};
use ordered_float::OrderedFloat;
use rand::distributions::Distribution;
use rand::Rng;
use std::num::NonZeroUsize;

pub mod density_estimation;
pub mod range;

/// Creates a [`Range`] instance.
pub fn range(start: f64, end: f64) -> Result<Range, RangeError> {
    Range::new(start, end)
}

/// Creates a [`Range`] for a categorical parameter.
///
/// This is equivalent to `range(0.0, cardinality as f64)`.
pub fn categorical_range(cardinality: usize) -> Result<Range, RangeError> {
    Range::new(0.0, cardinality as f64)
}

/// Creates a [`DefaultEstimatorBuilder`] to build [`ParzenEstimator`] (for categorical parameter).
pub fn parzen_estimator() -> DefaultEstimatorBuilder {
    DefaultEstimatorBuilder::Parzen(Default::default())
}

/// Creates a [`DefaultEstimatorBuilder`] to build [`HistogramEstimator`] (for numerical parameter).
pub fn histogram_estimator() -> DefaultEstimatorBuilder {
    DefaultEstimatorBuilder::Histogram(Default::default())
}

/// Builder of [`TpeOptimizer`].
#[derive(Debug)]
pub struct TpeOptimizerBuilder {
    gamma: f64,
    candidates: usize,
}

impl TpeOptimizerBuilder {
    /// Makes a new [`TpeOptimizerBuilder`] instance with the default settings.
    pub fn new() -> Self {
        Self::default()
    }

    /// Sets the percentage at which the good and bad observations are split.
    ///
    /// The default values is `0.1`.
    pub fn gamma(&mut self, gamma: f64) -> &mut Self {
        self.gamma = gamma;
        self
    }

    /// Sets the number of candidates to be sampled to decide the next parameter.
    ///
    /// The default value is `24`.
    pub fn candidates(&mut self, candidates: usize) -> &mut Self {
        self.candidates = candidates;
        self
    }

    /// Builds a [`TpeOptimizer`] with the given settings.
    pub fn build<T>(
        &self,
        estimator_builder: T,
        param_range: Range,
    ) -> Result<TpeOptimizer<T>, BuildError>
    where
        T: BuildDensityEstimator,
    {
        if !(0.0 <= self.gamma && self.gamma <= 1.0) {
            return Err(BuildError::GammaOutOfRange);
        }

        Ok(TpeOptimizer {
            param_range,
            estimator_builder,
            trials: Vec::new(),
            is_sorted: false,
            gamma: self.gamma,
            candidates: NonZeroUsize::new(self.candidates).ok_or(BuildError::ZeroCandidates)?,
        })
    }
}

impl Default for TpeOptimizerBuilder {
    fn default() -> Self {
        Self {
            gamma: 0.1,
            candidates: 24,
        }
    }
}

/// Optimizer using TPE.
///
/// This try to search out the parameter value which could minimize the evaluation result.
///
/// Note that an instance of TpeOptimizer can handle only one hyperparameter.
/// So if you want to optimize multiple hyperparameters simultaneously,
/// please create an optimizer for each hyperparameter.
#[derive(Debug)]
pub struct TpeOptimizer<T = DefaultEstimatorBuilder> {
    param_range: Range,
    estimator_builder: T,
    trials: Vec<Trial>,
    is_sorted: bool,
    gamma: f64,
    candidates: NonZeroUsize,
}

impl<T: BuildDensityEstimator> TpeOptimizer<T> {
    /// Makes a new [`TpeOptimizer`] with the default settings.
    ///
    /// If you want to customize the settings, please use [`TpeOptimizerBuilder`] instead.
    pub fn new(estimator_builder: T, param_range: Range) -> Self {
        TpeOptimizerBuilder::new()
            .build(estimator_builder, param_range)
            .expect("unreachable")
    }

    /// Returns the next value of the optimization target parameter to be evaluated.
    ///
    /// Note that, before the first asking, it might be worth to give some evaluation
    /// results of randomly sampled observations to [`TpeOptimizer`] (via the [`tell`](TpeOptimizer::tell) method)
    /// to reduce bias due to too few samples.
    pub fn ask<R: Rng + ?Sized>(&mut self, rng: &mut R) -> Result<f64, T::Error> {
        if !self.is_sorted {
            self.trials.sort_by_key(|t| OrderedFloat(t.value));
            self.is_sorted = true;
        }

        let split_point = self.decide_split_point();
        let (superiors, inferiors) = self.trials.split_at(split_point);

        let superior_estimator = self.estimator_builder.build_density_estimator(
            superiors.iter().map(|t| t.param).filter(|p| p.is_finite()),
            self.param_range,
        )?;
        let inferior_estimator = self.estimator_builder.build_density_estimator(
            inferiors.iter().map(|t| t.param).filter(|p| p.is_finite()),
            self.param_range,
        )?;

        let param = (&superior_estimator)
            .sample_iter(rng)
            .take(self.candidates.get())
            .map(|candidate| {
                let superior_log_likelihood = superior_estimator.log_pdf(candidate);
                let inferior_log_likelihood = inferior_estimator.log_pdf(candidate);
                let ei = superior_log_likelihood - inferior_log_likelihood;
                (ei, candidate)
            })
            .max_by_key(|(ei, _)| OrderedFloat(*ei))
            .map(|(_, param)| param)
            .expect("unreachable");
        Ok(param)
    }

    /// Tells the evaluation result of a hyperparameter value to the optimizer.
    ///
    /// Note that the `param` should be NaN if the hyperparameter was not used in the evaluation
    /// (this could be happen when the entire search space is conditional).
    pub fn tell(&mut self, param: f64, value: f64) -> Result<(), TellError> {
        if value.is_nan() {
            return Err(TellError::NanValue);
        }

        if !(param.is_nan() || self.param_range.contains(param)) {
            return Err(TellError::ParamOutOfRange {
                param,
                range: self.param_range,
            });
        }

        self.trials.push(Trial { param, value });
        self.is_sorted = false;

        Ok(())
    }

    /// Retruns all told parameter and objective values.
    ///
    /// Note that the order of items in the returned iterator doesn't reflect the order [`TpeOptimizer::tell`] called.
    ///
    /// # Examples
    ///
    /// You can this method to store and load the state of a [`TpeOptimizer`].
    ///
    /// ```
    /// # fn main() -> anyhow::Result<()> {
    /// let temp_file = tempfile::NamedTempFile::new()?;
    /// let mut rng = rand::thread_rng();
    ///
    /// fn objective(x: f64) -> f64 {
    ///     x.powi(2)
    /// }
    ///
    /// {
    ///     let mut optim =
    ///         tpe::TpeOptimizer::new(tpe::parzen_estimator(), tpe::range(-5.0, 5.0)?);
    ///
    ///     // Runs 100 trials.
    ///     for _ in 0..100 {
    ///         let x = optim.ask(&mut rng)?;
    ///         let v = objective(x);
    ///         optim.tell(x, v)?;
    ///     }
    ///
    ///     // Stores the trials.
    ///     let mut file = std::fs::OpenOptions::new()
    ///         .write(true)
    ///         .open(temp_file.path())?;
    ///     serde_json::to_writer(&mut file, &optim.trials().collect::<Vec<_>>())?;
    /// }
    ///
    /// {
    ///     let mut optim =
    ///         tpe::TpeOptimizer::new(tpe::parzen_estimator(), tpe::range(-5.0, 5.0)?);
    ///
    ///     // Loads the previous trials.
    ///     let mut file = std::fs::File::open(temp_file.path())?;
    ///     let trials: Vec<(f64, f64)> = serde_json::from_reader(&mut file)?;
    ///     for (x, v) in trials {
    ///         optim.tell(x, v)?;
    ///     }
    ///
    ///     // Runs additional 100 trials.
    ///     for _ in 0..100 {
    ///         let x = optim.ask(&mut rng)?;
    ///         let v = objective(x);
    ///         optim.tell(x, v)?;
    ///     }
    ///     assert_eq!(optim.trials().count(), 200);
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn trials(&self) -> impl '_ + Iterator<Item = (f64, f64)> {
        self.trials.iter().map(|t| (t.param, t.value))
    }

    fn decide_split_point(&self) -> usize {
        (self.trials.len() as f64 * self.gamma).ceil() as usize
    }
}

#[derive(Debug, Clone)]
struct Trial {
    param: f64,
    value: f64,
}

/// Possible errors during [`TpeOptimizerBuilder::build`].
#[derive(Debug, Clone, thiserror::Error)]
pub enum BuildError {
    #[error("the value of `gamma` must be in the range from 0.0 to 1.0")]
    /// The value of `gamma` must be in the range from `0.0` to `1.0`.
    GammaOutOfRange,

    #[error("the value of `candidates` must be a positive integer")]
    /// The value of `candidates` must be a positive integer.
    ZeroCandidates,
}

/// Possible errors during [`TpeOptimizer::tell`].
#[derive(Debug, Clone, thiserror::Error)]
pub enum TellError {
    #[error("the parameter value {param} is out of the range {range}")]
    /// The parameter value is out of the range.
    ParamOutOfRange {
        /// Actual parameter value.
        param: f64,
        /// Expected parameter range.
        range: Range,
    },

    #[error("NaN value is not allowed")]
    /// NaN value is not allowed.
    NanValue,
}

#[cfg(test)]
mod tests {
    use super::*;
    use rand::SeedableRng;

    #[test]
    fn optimizer_works() -> anyhow::Result<()> {
        let choices = [1, 10, 100];
        let mut optim0 = TpeOptimizer::new(parzen_estimator(), range(-5.0, 5.0)?);
        let mut optim1 =
            TpeOptimizer::new(histogram_estimator(), categorical_range(choices.len())?);

        fn objective(x: f64, y: i32) -> f64 {
            x.powi(2) + y as f64
        }

        let mut best_value = std::f64::INFINITY;
        let mut rng = rand::rngs::StdRng::from_seed(Default::default());
        for _ in 0..100 {
            let x = optim0.ask(&mut rng)?;
            let y = optim1.ask(&mut rng)?;

            let v = objective(x, choices[y as usize]);
            optim0.tell(x, v)?;
            optim1.tell(y, v)?;
            best_value = best_value.min(v);
        }
        assert_eq!(best_value, 1.000054276671888);

        Ok(())
    }

    #[test]
    fn store_and_save_trials_worls() -> anyhow::Result<()> {
        let temp_file = tempfile::NamedTempFile::new()?;
        let mut rng = rand::thread_rng();

        fn objective(x: f64) -> f64 {
            x.powi(2)
        }

        {
            let mut optim = TpeOptimizer::new(parzen_estimator(), range(-5.0, 5.0)?);

            // Runs 100 trials.
            for _ in 0..100 {
                let x = optim.ask(&mut rng)?;
                let v = objective(x);
                optim.tell(x, v)?;
            }

            // Stores the trials.
            let mut file = std::fs::OpenOptions::new()
                .write(true)
                .open(temp_file.path())?;
            serde_json::to_writer(&mut file, &optim.trials().collect::<Vec<_>>())?;
        }

        {
            let mut optim = TpeOptimizer::new(parzen_estimator(), range(-5.0, 5.0)?);

            // Loads the previous trials.
            let mut file = std::fs::File::open(temp_file.path())?;
            let trials: Vec<(f64, f64)> = serde_json::from_reader(&mut file)?;
            for (x, v) in trials {
                optim.tell(x, v)?;
            }

            // Runs additional 100 trials.
            for _ in 0..100 {
                let x = optim.ask(&mut rng)?;
                let v = objective(x);
                optim.tell(x, v)?;
            }
            assert_eq!(optim.trials().count(), 200);
        }

        Ok(())
    }
}