annembed/
embedparams.rs

1//! This module defines parameters for ann embedding.
2//!
3
4#[cfg_attr(doc, katexit::katexit)]
5/// It is necessary to describe briefly the model used in the embedding:
6///
7/// ## Definition of the weight of an edge of the graph to embed
8///
9/// First we define the local scale $\rho$ around a point.  
10/// It is defined as the mean of distances of points to their nearest neighbour.
11/// The points taken into account to define $\rho$ are the node we consider and
12/// all its knbn neighbours. So we compute the mean of distances to nearest neighbours
13/// on knbn + 1 points around current point.
14///   
15/// let ($d_{i}$) be the sorted distances in increasing order of neighbours  for i=0..k of a node n,
16///     $$w_{i} = \exp\left(- \left(\frac{d_{i} - d_{0}}{S * \rho}\right)^{\beta} \right)$$
17///
18/// S is a scale factor modulating $\rho$.
19/// After that weights are normalized to a probability distribution.
20///
21/// So before normalization $w_{0}$ is always equal to 1. Augmenting β to 2. makes the weight $w_{i}$ decrease faster.
22/// *The least weight of an edge must not go under $10^{-5}$ to limit the range of weight and avoid Svd numeric difficulties*.
23/// The code stops with an error in this case.
24/// So after normalization the range of weights from $w_{0}$ to $w_{k}$ is larger.
25/// Reducing S as similar effect but playing with both $\beta$ and the scale adjustment must not violate the range constraint on weights.
26///
27/// It must be noted that setting the scale as described before and renormalizing to get a probability distribution
28/// gives a perplexity nearly equal to the number of neighbours.  
29/// This can be verified by using the logging (implemented using the crates **env_logger** and **log**) and setting
30/// RUST_LOG=annembed=INFO in your environment.
31/// Then quantile summaries are given for the distributions of edge distances, edge weights, and perplexity
32/// of nodes. This helps adjusting parameters β, Scale and show their impact on these quantiles.
33///
34///  
35/// Default value :  
36///
37///  $\beta = 1$ so that we have exponential weights similar to Umap.  
38///
39///  $S = 0.5$
40///
41/// But it is possible to set β to 2. to get more gaussian weight or reduce to 0.5 and adjust S to respect the constraints on edge weights.
42///    
43/// ## Definition of the weight of an edge of the embedded graph
44///
45/// The embedded edge has the usual expression :
46/// $$ w(x,y) = \frac{1}{1+ || \left((x - y)/a_{x} \right)||^{2*b}  } $$
47///
48/// by default b = 1.
49/// The coefficient $a_{x}$ is deduced from the scale coefficient in the original space with some
50/// restriction to avoid too large fluctuations.
51///
52///
53///
54/// - Initial step of the gradient and number of batches
55///
56/// A number of batch for the Mnist digits data around 10-20 seems sufficient.
57/// The initial gradient step $\gamma_{0}$ can be chosen around 1. (in the range 1/5 ... 5.).    
58/// Reasonably it should satisfy nb_batch $ * \gamma_{0} < 1 $
59///
60/// - asked_dimension : default is set to 2.
61///
62/// ## The optimization of the embedding
63///
64/// The embedding is optimized by minimizing the (Shannon at present time) cross entropy
65/// between distribution  of original and embedded weight of edges. This minimization is done
66/// by a standard (multithreaded) stochastic gradient with negative sampling for the unobserved edges
67/// (see [Mnih-Teh](https://arxiv.org/abs/1206.6426) or
68/// [Mikolov](https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf))
69///
70/// The number of negative edge sampling is set to a fixed value 5.
71///
72/// - expression of the gradient
73///
74///
75/// here are the main parameters driving Embeding
76#[derive(Clone, Copy)]
77pub struct EmbedderParams {
78    /// embedding dimension : default to 2
79    pub asked_dim: usize,
80    /// defines if embedder is initialized by a diffusion map step. default to true
81    pub dmap_init: bool,
82    /// exponent used in defining edge weight in original graph. 0.5 or 1.
83    pub beta: f64,
84    /// exponenent used in embedded space, default 1.
85    pub b: f64,
86    /// embedded scale factor. default to 1.
87    pub scale_rho: f64,
88    /// initial gradient step , default to 2.
89    pub grad_step: f64,
90    /// nb sampling by edge in gradient step. default = 10
91    pub nb_sampling_by_edge: usize,
92    /// number of gradient batch. default to 15
93    pub nb_grad_batch: usize,
94    /// the number of gradient batch in hierarchical case is nb_grad_batch multiplied by grad_factor.
95    /// As the first iterations run on few points we can do more iterations. Default is 4.
96    pub grad_factor: usize,
97    /// if layer > 0 means we have hierarchical initialization
98    pub hierarchy_layer: usize,
99    /// To do negative sampling of nodes using hubness weights as node distribution, set it to true.  
100    /// Default is false.  
101    /// It improves slightly the quality estimated by [quality estimator](super::embedder::Embedder::get_quality_estimate_from_edge_length())
102    pub hubness_weighting: bool,
103} // end of EmbedderParams
104
105impl EmbedderParams {
106    #[allow(clippy::should_implement_trait)]
107    pub fn default() -> Self {
108        let asked_dim = 2;
109        let dmap_init = true;
110        let beta = 1.;
111        let b = 1.;
112        let grad_step = 2.;
113        let nb_sampling_by_edge = 10;
114        let scale_rho = 1.;
115        let nb_grad_batch = 20;
116        let grad_factor: usize = 4;
117        let hierarchy_layer = 0;
118        let hubness_weighting = false;
119        EmbedderParams {
120            asked_dim,
121            dmap_init,
122            beta,
123            b,
124            scale_rho,
125            grad_step,
126            nb_sampling_by_edge,
127            nb_grad_batch,
128            grad_factor,
129            hierarchy_layer,
130            hubness_weighting,
131        }
132    }
133
134    pub fn log(&self) {
135        log::info!("EmbedderParams");
136        log::info!("\t asked dim : {}", self.asked_dim);
137        log::info!("\t gradient step : {}", self.grad_step);
138        log::info!("\t edge exponent in original graph : {} ", self.beta);
139        log::info!("\t nb sampling by edge : {}", self.nb_sampling_by_edge);
140        log::info!("\t beta : {}", self.beta);
141        log::info!("\t scale factor : {}", self.scale_rho);
142        log::info!("\t number of gradient batch : {}", self.nb_grad_batch);
143        log::info!(
144            "\t factor for nbgradient batch in first hierarchical pass is  : {}",
145            self.grad_factor
146        );
147        log::info!("\t hierarchy layer  : {}", self.hierarchy_layer);
148        log::info!("\t hubness weighting : {}", self.hubness_weighting);
149    }
150
151    /// set to false if random initialization is preferred
152    pub fn set_dmap_init(&mut self, val: bool) {
153        self.dmap_init = val;
154    }
155
156    /// set the number of gradient batch. At each batch each edge is sampled nb_sampling_by_edge times.
157    /// default to 20
158    pub fn set_nb_gradient_batch(&mut self, nb_batch: usize) {
159        self.nb_grad_batch = nb_batch;
160    }
161
162    /// sets the dimension for data embedding. Default to 2
163    pub fn set_dim(&mut self, dim: usize) {
164        self.asked_dim = dim;
165    }
166
167    /// sets the number of time each edge should be sampled in a gradient batch. Default to 10
168    pub fn set_nb_edge_sampling(&mut self, nb_sample_by_edge: usize) {
169        self.nb_sampling_by_edge = nb_sample_by_edge;
170    }
171
172    /// get asked embedding dimension
173    pub fn get_dimension(&self) -> usize {
174        self.asked_dim
175    }
176
177    pub fn set_hierarchy_layer(&mut self, layer: usize) {
178        self.hierarchy_layer = layer;
179    }
180
181    pub fn get_hierarchy_layer(&self) -> usize {
182        self.hierarchy_layer
183    }
184} // end of impl EmbedderParams