annembed/embedparams.rs
1//! This module defines parameters for ann embedding.
2//!
3
4#[cfg_attr(doc, katexit::katexit)]
5/// It is necessary to describe briefly the model used in the embedding:
6///
7/// ## Definition of the weight of an edge of the graph to embed
8///
9/// First we define the local scale $\rho$ around a point.
10/// It is defined as the mean of distances of points to their nearest neighbour.
11/// The points taken into account to define $\rho$ are the node we consider and
12/// all its knbn neighbours. So we compute the mean of distances to nearest neighbours
13/// on knbn + 1 points around current point.
14///
15/// let ($d_{i}$) be the sorted distances in increasing order of neighbours for i=0..k of a node n,
16/// $$w_{i} = \exp\left(- \left(\frac{d_{i} - d_{0}}{S * \rho}\right)^{\beta} \right)$$
17///
18/// S is a scale factor modulating $\rho$.
19/// After that weights are normalized to a probability distribution.
20///
21/// So before normalization $w_{0}$ is always equal to 1. Augmenting β to 2. makes the weight $w_{i}$ decrease faster.
22/// *The least weight of an edge must not go under $10^{-5}$ to limit the range of weight and avoid Svd numeric difficulties*.
23/// The code stops with an error in this case.
24/// So after normalization the range of weights from $w_{0}$ to $w_{k}$ is larger.
25/// Reducing S as similar effect but playing with both $\beta$ and the scale adjustment must not violate the range constraint on weights.
26///
27/// It must be noted that setting the scale as described before and renormalizing to get a probability distribution
28/// gives a perplexity nearly equal to the number of neighbours.
29/// This can be verified by using the logging (implemented using the crates **env_logger** and **log**) and setting
30/// RUST_LOG=annembed=INFO in your environment.
31/// Then quantile summaries are given for the distributions of edge distances, edge weights, and perplexity
32/// of nodes. This helps adjusting parameters β, Scale and show their impact on these quantiles.
33///
34///
35/// Default value :
36///
37/// $\beta = 1$ so that we have exponential weights similar to Umap.
38///
39/// $S = 0.5$
40///
41/// But it is possible to set β to 2. to get more gaussian weight or reduce to 0.5 and adjust S to respect the constraints on edge weights.
42///
43/// ## Definition of the weight of an edge of the embedded graph
44///
45/// The embedded edge has the usual expression :
46/// $$ w(x,y) = \frac{1}{1+ || \left((x - y)/a_{x} \right)||^{2*b} } $$
47///
48/// by default b = 1.
49/// The coefficient $a_{x}$ is deduced from the scale coefficient in the original space with some
50/// restriction to avoid too large fluctuations.
51///
52///
53///
54/// - Initial step of the gradient and number of batches
55///
56/// A number of batch for the Mnist digits data around 10-20 seems sufficient.
57/// The initial gradient step $\gamma_{0}$ can be chosen around 1. (in the range 1/5 ... 5.).
58/// Reasonably it should satisfy nb_batch $ * \gamma_{0} < 1 $
59///
60/// - asked_dimension : default is set to 2.
61///
62/// ## The optimization of the embedding
63///
64/// The embedding is optimized by minimizing the (Shannon at present time) cross entropy
65/// between distribution of original and embedded weight of edges. This minimization is done
66/// by a standard (multithreaded) stochastic gradient with negative sampling for the unobserved edges
67/// (see [Mnih-Teh](https://arxiv.org/abs/1206.6426) or
68/// [Mikolov](https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf))
69///
70/// The number of negative edge sampling is set to a fixed value 5.
71///
72/// - expression of the gradient
73///
74///
75/// here are the main parameters driving Embeding
76#[derive(Clone, Copy)]
77pub struct EmbedderParams {
78 /// embedding dimension : default to 2
79 pub asked_dim: usize,
80 /// defines if embedder is initialized by a diffusion map step. default to true
81 pub dmap_init: bool,
82 /// exponent used in defining edge weight in original graph. 0.5 or 1.
83 pub beta: f64,
84 /// exponenent used in embedded space, default 1.
85 pub b: f64,
86 /// embedded scale factor. default to 1.
87 pub scale_rho: f64,
88 /// initial gradient step , default to 2.
89 pub grad_step: f64,
90 /// nb sampling by edge in gradient step. default = 10
91 pub nb_sampling_by_edge: usize,
92 /// number of gradient batch. default to 15
93 pub nb_grad_batch: usize,
94 /// the number of gradient batch in hierarchical case is nb_grad_batch multiplied by grad_factor.
95 /// As the first iterations run on few points we can do more iterations. Default is 4.
96 pub grad_factor: usize,
97 /// if layer > 0 means we have hierarchical initialization
98 pub hierarchy_layer: usize,
99 /// To do negative sampling of nodes using hubness weights as node distribution, set it to true.
100 /// Default is false.
101 /// It improves slightly the quality estimated by [quality estimator](super::embedder::Embedder::get_quality_estimate_from_edge_length())
102 pub hubness_weighting: bool,
103} // end of EmbedderParams
104
105impl EmbedderParams {
106 #[allow(clippy::should_implement_trait)]
107 pub fn default() -> Self {
108 let asked_dim = 2;
109 let dmap_init = true;
110 let beta = 1.;
111 let b = 1.;
112 let grad_step = 2.;
113 let nb_sampling_by_edge = 10;
114 let scale_rho = 1.;
115 let nb_grad_batch = 20;
116 let grad_factor: usize = 4;
117 let hierarchy_layer = 0;
118 let hubness_weighting = false;
119 EmbedderParams {
120 asked_dim,
121 dmap_init,
122 beta,
123 b,
124 scale_rho,
125 grad_step,
126 nb_sampling_by_edge,
127 nb_grad_batch,
128 grad_factor,
129 hierarchy_layer,
130 hubness_weighting,
131 }
132 }
133
134 pub fn log(&self) {
135 log::info!("EmbedderParams");
136 log::info!("\t asked dim : {}", self.asked_dim);
137 log::info!("\t gradient step : {}", self.grad_step);
138 log::info!("\t edge exponent in original graph : {} ", self.beta);
139 log::info!("\t nb sampling by edge : {}", self.nb_sampling_by_edge);
140 log::info!("\t beta : {}", self.beta);
141 log::info!("\t scale factor : {}", self.scale_rho);
142 log::info!("\t number of gradient batch : {}", self.nb_grad_batch);
143 log::info!(
144 "\t factor for nbgradient batch in first hierarchical pass is : {}",
145 self.grad_factor
146 );
147 log::info!("\t hierarchy layer : {}", self.hierarchy_layer);
148 log::info!("\t hubness weighting : {}", self.hubness_weighting);
149 }
150
151 /// set to false if random initialization is preferred
152 pub fn set_dmap_init(&mut self, val: bool) {
153 self.dmap_init = val;
154 }
155
156 /// set the number of gradient batch. At each batch each edge is sampled nb_sampling_by_edge times.
157 /// default to 20
158 pub fn set_nb_gradient_batch(&mut self, nb_batch: usize) {
159 self.nb_grad_batch = nb_batch;
160 }
161
162 /// sets the dimension for data embedding. Default to 2
163 pub fn set_dim(&mut self, dim: usize) {
164 self.asked_dim = dim;
165 }
166
167 /// sets the number of time each edge should be sampled in a gradient batch. Default to 10
168 pub fn set_nb_edge_sampling(&mut self, nb_sample_by_edge: usize) {
169 self.nb_sampling_by_edge = nb_sample_by_edge;
170 }
171
172 /// get asked embedding dimension
173 pub fn get_dimension(&self) -> usize {
174 self.asked_dim
175 }
176
177 pub fn set_hierarchy_layer(&mut self, layer: usize) {
178 self.hierarchy_layer = layer;
179 }
180
181 pub fn get_hierarchy_layer(&self) -> usize {
182 self.hierarchy_layer
183 }
184} // end of impl EmbedderParams