annembed/
embedparams.rs

1//! This module defines parameters for ann embedding.
2//!
3
4#[cfg_attr(doc, katexit::katexit)]
5/// It is necessary to describe briefly the model used in the embedding:
6///
7/// ## Definition of the weight of an edge of the graph to embed
8///
9/// First we define the local scale $\rho$ around a point.  
10/// It is defined as the mean of distances of points to their nearest neighbour.
11/// The points taken into account to define $\rho$ are the node we consider and
12/// all its knbn neighbours. So we compute the mean of distances to nearest neighbours
13/// on knbn + 1 points around current point.
14///   
15/// let ($d_{i}$) be the sorted distances in increasing order of neighbours  for i=0..k of a node n,
16///     $$w_{i} = \exp\left(- \left(\frac{d_{i} - d_{0}}{S * \rho}\right)^{\beta} \right)$$
17///
18/// S is a scale factor modulating $\rho$.
19/// After that weights are normalized to a probability distribution.
20///
21/// So before normalization $w_{0}$ is always equal to 1. Augmenting β to 2. makes the weight $w_{i}$ decrease faster.
22/// *The least weight of an edge must not go under $10^{-5}$ to limit the range of weight and avoid Svd numeric difficulties*.
23/// The code stops with an error in this case.
24/// So after normalization the range of weights from $w_{0}$ to $w_{k}$ is larger.
25/// Reducing S as similar effect but playing with both $\beta$ and the scale adjustment must not violate the range constraint on weights.
26///
27/// It must be noted that setting the scale as described before and renormalizing to get a probability distribution
28/// gives a perplexity nearly equal to the number of neighbours.  
29/// This can be verified by using the logging (implemented using the crates **env_logger** and **log**) and setting
30/// RUST_LOG=annembed=INFO in your environment.
31/// Then quantile summaries are given for the distributions of edge distances, edge weights, and perplexity
32/// of nodes. This helps adjusting parameters β, Scale and show their impact on these quantiles.
33///
34///  
35/// Default value :  
36///
37///  $\beta = 1$ so that we have exponential weights similar to Umap.  
38///
39///  $S = 0.5$
40///
41/// But it is possible to set β to 2. to get more gaussian weight or reduce to 0.5 and adjust S to respect the constraints on edge weights.
42///    
43/// ## Definition of the weight of an edge of the embedded graph
44///
45/// The embedded edge has the usual expression :
46/// $$ w(x,y) = \frac{1}{1+ || \left((x - y)/a_{x} \right)||^{2*b}  } $$
47///
48/// by default b = 1.
49/// The coefficient $a_{x}$ is deduced from the scale coefficient in the original space with some
50/// restriction to avoid too large fluctuations.
51///
52///
53///
54/// - Initial step of the gradient and number of batches
55///
56/// A number of batch for the Mnist digits data around 10-20 seems sufficient.
57/// The initial gradient step $\gamma_{0}$ can be chosen around 1. (in the range 1/5 ... 5.).    
58/// Reasonably it should satisfy nb_batch $ * \gamma_{0} < 1 $
59///
60/// - asked_dimension : default is set to 2.
61///
62/// ## The optimization of the embedding
63///
64/// The embedding is optimized by minimizing the (Shannon at present time) cross entropy
65/// between distribution  of original and embedded weight of edges. This minimization is done
66/// by a standard (multithreaded) stochastic gradient with negative sampling for the unobserved edges
67/// (see [Mnih-Teh](https://arxiv.org/abs/1206.6426) or
68/// [Mikolov](https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf))
69///
70/// The number of negative edge sampling is set to a fixed value 5.
71///
72/// - expression of the gradient
73///
74///
75/// here are the main parameters driving Embeding
76#[derive(Clone, Copy)]
77pub struct EmbedderParams {
78    /// embedding dimension : default to 2
79    pub asked_dim: usize,
80    /// defines if embedder is initialized by a diffusion map step. default to true
81    pub dmap_init: bool,
82    /// exponent used in defining edge weight in original graph. 0.5 or 1.
83    pub beta: f64,
84    /// exponenent used in embedded space, default 1.
85    pub b: f64,
86    /// embedded scale factor. default to 1.
87    pub scale_rho: f64,
88    /// initial gradient step , default to 2.
89    pub grad_step: f64,
90    /// nb sampling by edge in gradient step. default = 10
91    pub nb_sampling_by_edge: usize,
92    /// number of gradient batch. default to 15
93    pub nb_grad_batch: usize,
94    /// the number of gradient batch in hierarchical case is nb_grad_batch multiplied by grad_factor.
95    /// As the first iterations run on few points we can do more iterations. Default is 4.
96    pub grad_factor: usize,
97    /// if layer > 0 means we have hierarchical initialization
98    pub hierarchy_layer: usize,
99} // end of EmbedderParams
100
101impl EmbedderParams {
102    #[allow(clippy::should_implement_trait)]
103    pub fn default() -> Self {
104        let asked_dim = 2;
105        let dmap_init = true;
106        let beta = 1.;
107        let b = 1.;
108        let grad_step = 2.;
109        let nb_sampling_by_edge = 10;
110        let scale_rho = 1.;
111        let nb_grad_batch = 20;
112        let grad_factor: usize = 4;
113        let hierarchy_layer = 0;
114        EmbedderParams {
115            asked_dim,
116            dmap_init,
117            beta,
118            b,
119            scale_rho,
120            grad_step,
121            nb_sampling_by_edge,
122            nb_grad_batch,
123            grad_factor,
124            hierarchy_layer,
125        }
126    }
127
128    pub fn log(&self) {
129        log::info!("EmbedderParams");
130        log::info!("\t asked dim : {}", self.asked_dim);
131        log::info!("\t gradient step : {}", self.grad_step);
132        log::info!("\t edge exponent in original graph : {} ", self.beta);
133        log::info!("\t nb sampling by edge : {}", self.nb_sampling_by_edge);
134        log::info!("\t beta : {}", self.beta);
135        log::info!("\t scale factor : {}", self.scale_rho);
136        log::info!("\t number of gradient batch : {}", self.nb_grad_batch);
137        log::info!(
138            "\t factor for nbgradient batch in first hierarchical pass is  : {}",
139            self.grad_factor
140        );
141        log::info!("\t hierarchy layer  : {}", self.hierarchy_layer);
142    }
143
144    /// set to false if random initialization is preferred
145    pub fn set_dmap_init(&mut self, val: bool) {
146        self.dmap_init = val;
147    }
148
149    /// set the number of gradient batch. At each batch each edge is sampled nb_sampling_by_edge times.
150    /// default to 20
151    pub fn set_nb_gradient_batch(&mut self, nb_batch: usize) {
152        self.nb_grad_batch = nb_batch;
153    }
154
155    /// sets the dimension for data embedding. Default to 2
156    pub fn set_dim(&mut self, dim: usize) {
157        self.asked_dim = dim;
158    }
159
160    /// sets the number of time each edge should be sampled in a gradient batch. Default to 10
161    pub fn set_nb_edge_sampling(&mut self, nb_sample_by_edge: usize) {
162        self.nb_sampling_by_edge = nb_sample_by_edge;
163    }
164
165    /// get asked embedding dimension
166    pub fn get_dimension(&self) -> usize {
167        self.asked_dim
168    }
169
170    pub fn set_hierarchy_layer(&mut self, layer: usize) {
171        self.hierarchy_layer = layer;
172    }
173
174    pub fn get_hierarchy_layer(&self) -> usize {
175        self.hierarchy_layer
176    }
177} // end of impl EmbedderParams