use rand::seq::SliceRandom;
use crate::panic_untrained;
use num::Num;
use num_traits::{NumCast, ToPrimitive};
use rand::thread_rng;
pub struct RegressionPerceptron<D> {
model_name: String,
pub(crate) penalty: String,
pub(crate) alpha: f32,
pub(crate) shuffle: bool,
data: Vec<Vec<D>>,
labels: Vec<f64>,
weights: Vec<f64>,
bias: f64,
learning_rate: f64,
epochs: usize,
feature_mins: Vec<f64>,
feature_maxs: Vec<f64>,
}
impl<D> RegressionPerceptron<D>
where
D: Num + Copy + Clone + PartialOrd + ToPrimitive + NumCast,
{
pub fn new() -> Self {
Self {
model_name: "Regression Perceptron".to_string(),
penalty: "l2".to_string(),
alpha: 0.0001,
shuffle: true,
data: Vec::new(),
labels: Vec::new(),
weights: Vec::new(),
bias: 0.0,
learning_rate: 0.01,
epochs: 5,
feature_mins: vec![],
feature_maxs: vec![],
}
}
pub fn set_penalty(&mut self, penalty: String) {
self.penalty = penalty;
}
pub fn set_alpha(&mut self, alpha: f32) {
self.alpha = alpha;
}
pub fn set_shuffle(&mut self, shuffle: bool) {
self.shuffle = shuffle;
}
pub fn set_epochs(&mut self, epochs: usize) {
self.epochs = epochs;
}
pub fn set_learning_rate(&mut self, lr: f64) {
self.learning_rate = lr;
}
pub fn fit(&mut self, mut data: Vec<Vec<D>>, labels: Vec<f64>) {
if data.is_empty() || labels.is_empty() {
return;
}
let feature_count = data[0].len();
let mut mins = vec![f64::MAX; feature_count];
let mut maxs = vec![f64::MIN; feature_count];
for row in &data {
for (i, val) in row.iter().enumerate() {
let val_f64 = val.to_f64().unwrap_or(0.0);
mins[i] = mins[i].min(val_f64);
maxs[i] = maxs[i].max(val_f64);
}
}
for row in &mut data {
for (i, val) in row.iter_mut().enumerate() {
let val_f64 = val.to_f64().unwrap_or(0.0);
let denominator = maxs[i] - mins[i];
if denominator > 0.0 {
*val = num_traits::cast((val_f64 - mins[i]) / denominator).unwrap();
} else {
*val = num_traits::cast(0.0).unwrap(); }
}
}
self.feature_mins = mins;
self.feature_maxs = maxs;
self.weights = vec![0.0; feature_count];
self.bias = 0.0;
self.data = data;
self.labels = labels;
if self.shuffle {
let mut rng = thread_rng();
let mut indices: Vec<usize> = (0..self.data.len()).collect();
indices.shuffle(&mut rng);
let mut shuffled_data = Vec::new();
let mut shuffled_labels = Vec::new();
for &i in &indices {
shuffled_data.push(self.data[i].clone());
shuffled_labels.push(self.labels[i]);
}
self.data = shuffled_data;
self.labels = shuffled_labels;
}
for _ in 0..self.epochs {
self._fit();
}
}
fn _fit(&mut self) {
for (row, &label) in self.data.iter().zip(self.labels.iter()) {
let features_f64: Vec<f64> = row
.iter()
.map(|x| x.to_f64().unwrap_or(0.0))
.collect();
let prediction: f64 = self.weights
.iter()
.zip(features_f64.iter())
.map(|(w, x)| w * x)
.sum::<f64>() + self.bias;
let error = label - prediction;
for (i, &x) in features_f64.iter().enumerate() {
self.weights[i] += self.learning_rate * error * x;
}
self.bias += self.learning_rate * error;
}
}
pub fn predict(&self, sample: Vec<D>) -> f64 {
panic_untrained!(self.labels.len() == 0, self.model_name);
let features_f64: Vec<f64> = sample
.into_iter()
.enumerate()
.map(|(i, x)| {
let val = x.to_f64().unwrap_or(0.0);
if i < self.feature_mins.len() {
let denominator = self.feature_maxs[i] - self.feature_mins[i];
if denominator > 0.0 {
(val - self.feature_mins[i]) / denominator
} else {
0.0 }
} else {
val }
})
.collect();
if features_f64.len() != self.weights.len() {
eprintln!("Warning: Feature dimensions don't match weight dimensions");
return f64::NAN;
}
let prediction = self.weights
.iter()
.zip(features_f64.iter())
.map(|(w, x)| w * x)
.sum::<f64>() + self.bias;
if prediction.is_infinite() || prediction.is_nan() {
eprintln!("Warning: Numerical overflow in prediction");
return 0.0;
}
prediction
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_fit_and_predict() {
let data = vec![
vec![1.0_f64],
vec![2.0_f64],
vec![3.0_f64],
];
let labels = vec![3.0, 5.0, 7.0];
let mut model = RegressionPerceptron::<f64>::new();
model.set_learning_rate(0.01);
model.set_epochs(500);
model.set_alpha(0.001);
model.set_penalty("l2".to_string());
model.set_shuffle(true);
model.fit(data, labels);
let prediction = model.predict(vec![4.0_f64]);
let expected = 9.0;
let tolerance = 1.0;
assert!(
(prediction - expected).abs() < tolerance,
"Expected prediction near {}, got {}",
expected,
prediction
);
}
#[test]
fn test_multiple_features() {
let data = vec![
vec![1.0, 1.0],
vec![2.0, 2.0],
vec![3.0, 3.0],
];
let labels = vec![
1.0 * 1.0 + 2.0 * 1.0 + 3.0, 1.0 * 2.0 + 2.0 * 2.0 + 3.0, 1.0 * 3.0 + 2.0 * 3.0 + 3.0, ];
let mut model = RegressionPerceptron::<f64>::new();
model.set_learning_rate(0.01);
model.set_epochs(500);
model.fit(data, labels);
let prediction = model.predict(vec![4.0, 4.0]);
let expected = 1.0 * 4.0 + 2.0 * 4.0 + 3.0; let tolerance = 1.0;
assert!(
(prediction - expected).abs() < tolerance,
"Expected prediction near {}, got {}",
expected,
prediction
);
}
#[test]
fn test_with_integer_data() {
let data = vec![
vec![1, 2],
vec![2, 3],
vec![3, 4],
];
let labels = vec![
1.0 + 2.0 + 1.0, 2.0 + 3.0 + 1.0, 3.0 + 4.0 + 1.0, ];
let mut model = RegressionPerceptron::<i32>::new();
model.set_learning_rate(0.01);
model.set_epochs(500);
model.fit(data, labels);
let prediction = model.predict(vec![4, 5]);
let expected = 4.0 + 5.0 + 1.0; let tolerance = 1.0;
assert!(
(prediction - expected).abs() < tolerance,
"Expected prediction near {}, got {}",
expected,
prediction
);
}
#[test]
#[should_panic]
fn test_predict_before_fit() {
let model = RegressionPerceptron::<f64>::new();
model.predict(vec![1.0]);
}
}