1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
use num_traits::{FromPrimitive, Num, ToPrimitive};
use crate::common::knn::{neighbors, Neighbor};
pub struct KNNRegression<D, L> {
k: usize,
data: Vec<Vec<D>>,
labels: Vec<L>,
weight_type: String,
distance_metric: String,
}
impl<D, L> KNNRegression<D, L>
where
D: Num + Copy + Clone + PartialOrd + ToPrimitive,
L: Num + Copy + Clone + PartialOrd + ToPrimitive + FromPrimitive,
{
/// Creates a new KNNRegression with a specified value of k.
///
/// # Arguments
/// * `k` - The number of neighbors to consider for regression. Must be greater than zero.
///
/// # Panics
/// This function will panic if `k` is less than or equal to zero.
///
/// # Examples
/// ```
/// use rusty_science::regression::KNNRegression;
/// let knn = KNNRegression::<f64, f64>::new(3);
/// ```
pub fn new(k: usize) -> Self {
if k <= 0 {
panic!("K cannot be zero");
}
Self {
k,
data: vec![],
labels: vec![],
weight_type: "uniform".to_string(),
distance_metric: "euclidean".to_string(),
}
}
/// Sets the method that will determine the weights for neighbors, either 'uniform' or 'distance'.
///
/// # Arguments
/// * `weight_type` - A string specifying the weight type: either 'uniform' or 'distance'.
///
/// # Panics
/// This function will panic if an unsupported weight type is provided.
///
/// # Examples
/// ```
/// use rusty_science::regression::KNNRegression;
/// let mut knn = KNNRegression::<f64, f64>::new(3);
/// knn.set_weight_type("uniform".to_string());
/// ```
pub fn set_weight_type(&mut self, weight_type: String) {
if weight_type.to_lowercase() == "uniform" || weight_type.to_lowercase() == "distance" {
self.weight_type = weight_type;
} else {
panic!("Unsupported or unknown weight type, use uniform or distance");
}
}
/// Sets the distance metric to be used for finding neighbors.
///
/// # Arguments
/// * `distance_metric` - A string specifying the distance metric, e.g., 'euclidean' or 'manhattan'.
///
/// # Examples
/// ```
/// use rusty_science::regression::KNNRegression;
/// let mut knn = KNNRegression::<f64, f64>::new(3);
/// knn.set_distance_metrics("manhattan".to_string());
/// ```
pub fn set_distance_metrics(&mut self, distance_metric: String) {
self.distance_metric = distance_metric;
}
/// Fits the regression with the training data and labels.
///
/// # Arguments
/// * `data` - A vector of vectors containing the training data points.
/// * `labels` - A vector of labels corresponding to the training data points.
///
/// # Examples
/// ```
/// use rusty_science::regression::KNNRegression;
/// let mut knn = KNNRegression::<f64, f64>::new(3);
/// let data = vec![vec![1.0, 2.0], vec![2.0, 3.0], vec![3.0, 4.0]];
/// let labels = vec![0.72, 1.0, 0.26];
/// knn.fit(data, labels);
/// ```
pub fn fit(&mut self, data: Vec<Vec<D>>, labels: Vec<L>) {
self.data = data;
self.labels = labels
}
/// Predicts the label for a given target data point.
///
/// # Arguments
/// * `target` - A vector representing the features of the data point to be classified.
///
/// # Returns
/// * An `f64` representing the predicted label for the target data point.
///
/// # Examples
/// ```
/// use rusty_science::regression::KNNRegression;
/// let mut knn = KNNRegression::<f64, f64>::new(3);
/// let data = vec![vec![1.0, 2.0], vec![2.0, 3.0], vec![3.0, 4.0]];
/// let labels = vec![0.72, 1.0, 0.26];
/// knn.fit(data, labels);
/// let prediction = knn.predict(vec![2.5, 3.5]);
/// println!("Predicted label: {}", prediction);
/// ```
pub fn predict(&self, target: Vec<D>) -> L {
self._predict(target)
}
fn _predict(&self, target: Vec<D>) -> L {
let calculate_distance = self.weight_type == "distance";
let neighbors: Vec<Neighbor<D, L>> = neighbors(
self.data.clone(),
Some(self.labels.clone()),
Some(target),
self.k,
self.distance_metric.clone(),
calculate_distance,
);
let mut weighted_sum = 0.0_f64;
let mut total_weight = 0.0_f64;
for neighbor in neighbors.iter() {
let label = neighbor.label.unwrap();
let distance = neighbor.distance_to_target;
let weight = if calculate_distance && distance != 0.0 {
1.0 / (distance + 1e-8)
} else {
1.0
};
// Convert label to f64 for calculation
let label_f64 = label.to_f64().expect("Failed to convert label to f64");
weighted_sum += weight * label_f64;
total_weight += weight;
}
// Compute weighted average
let avg_label_f64 = if total_weight == 0.0 {
let sum_labels: f64 = neighbors
.iter()
.map(|neighbor| neighbor.label.unwrap().to_f64().unwrap())
.sum();
sum_labels / neighbors.len() as f64
} else {
weighted_sum / total_weight
};
// Convert the average back to L
L::from_f64(avg_label_f64).expect("Failed to convert average label from f64")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_knn_regression() {
let knn = KNNRegression::<f64, f64>::new(3);
assert_eq!(knn.k, 3);
assert_eq!(knn.data.len(), 0);
assert_eq!(knn.labels.len(), 0);
assert_eq!(knn.weight_type, "uniform");
assert_eq!(knn.distance_metric, "euclidean");
}
#[test]
fn test_fit() {
let mut knn = KNNRegression::<f64, f64>::new(3);
let data = vec![vec![1.0, 2.0], vec![2.0, 3.0]];
let labels = vec![1.5, 2.5];
knn.fit(data.clone(), labels.clone());
assert_eq!(knn.data, data);
assert_eq!(knn.labels, labels);
}
#[test]
fn test_set_weights() {
let mut knn = KNNRegression::<f64, f64>::new(3);
knn.set_weight_type("distance".to_string());
assert_eq!(knn.weight_type, "distance");
knn.set_weight_type("uniform".to_string());
assert_eq!(knn.weight_type, "uniform");
}
#[should_panic]
#[test]
fn test_unsupported_weights() {
let mut knn = KNNRegression::<f64, f64>::new(3);
knn.set_weight_type("unsupported".to_string());
}
#[test]
fn test_set_distance_metrics() {
let mut knn = KNNRegression::<f64, f64>::new(3);
knn.set_distance_metrics("manhattan".to_string());
assert_eq!(knn.distance_metric, "manhattan");
knn.set_distance_metrics("euclidean".to_string());
assert_eq!(knn.distance_metric, "euclidean");
}
#[test]
fn test_predict() {
let mut knn = KNNRegression::<f64, f64>::new(3);
let data = vec![
vec![1.0, 1.0],
vec![2.0, 2.0],
vec![3.0, 3.0],
vec![4.0, 4.0],
];
let labels = vec![1.0, 2.0, 3.0, 4.0];
knn.fit(data, labels);
let target = vec![2.5, 2.5];
let prediction = knn.predict(target);
assert!(prediction >= 2.0 && prediction <= 3.0);
}
#[test]
fn test_knn_regression_non_default_weight() {
let mut knn = KNNRegression::<f64, f64>::new(3);
let data = vec![
vec![1.0, 1.0],
vec![2.0, 2.0],
vec![3.0, 3.0],
vec![4.0, 4.0],
];
let labels = vec![1.0, 2.0, 3.0, 4.0];
knn.fit(data, labels);
knn.set_weight_type(String::from("uniform"));
let target = vec![2.5, 2.5];
let prediction = knn.predict(target);
assert!(prediction >= 2.0 && prediction <= 3.0);
}
#[test]
#[should_panic]
fn test_knn_zero_k(){
let _ = KNNRegression::<f64, f64>::new(0);
}
#[test]
fn set_weight_type() {
let data = vec![
vec![1.0, 1.0],
vec![2.0, 2.0],
vec![3.0, 3.0],
vec![4.0, 4.0],
];
let labels = vec![1.0, 2.0, 3.0, 4.0];
let mut knn = KNNRegression::<f64, f64>::new(3);
knn.set_weight_type("distance".to_string());
knn.fit(data, labels);
knn.predict(vec![1.0, 2.0]);
}
}