use ndarray::{Array2, arr2};
use rust_lstm::models::lstm_network::LSTMNetwork;
use rust_lstm::training::LSTMTrainer;
use rust_lstm::loss::MSELoss;
use rust_lstm::optimizers::Adam;
use std::fs::File;
use std::io::{BufRead, BufReader};
#[derive(Debug, Clone)]
struct DataPoint {
timestamp: String,
values: Vec<f64>,
}
struct CSVDataLoader {
data: Vec<DataPoint>,
feature_names: Vec<String>,
normalizers: Vec<(f64, f64)>, }
impl CSVDataLoader {
#[allow(dead_code)]
fn from_csv(file_path: &str, target_column: &str) -> std::io::Result<Self> {
let file = File::open(file_path)?;
let reader = BufReader::new(file);
let mut lines = reader.lines();
let header_line = lines.next().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::InvalidData, "Empty file")
})??;
let headers: Vec<String> = header_line.split(',')
.map(|s| s.trim().to_string())
.collect();
let _target_idx = headers.iter().position(|h| h == target_column)
.ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::InvalidData,
format!("Target column '{}' not found", target_column))
})?;
let mut data = Vec::new();
for line in lines {
let line = line?;
let values: Result<Vec<f64>, _> = line.split(',')
.enumerate()
.filter_map(|(i, s)| {
if i == 0 { None } else { Some(s.trim().parse::<f64>()) }
})
.collect();
match values {
Ok(vals) if !vals.is_empty() => {
let timestamp = line.split(',').next().unwrap_or("").to_string();
data.push(DataPoint { timestamp, values: vals });
},
_ => continue, }
}
let feature_names = headers[1..].to_vec();
Ok(Self {
data,
feature_names,
normalizers: Vec::new(),
})
}
fn generate_synthetic_sensor_data(days: usize) -> Self {
let mut data = Vec::new();
for i in 0..days * 24 { let hour_of_day = (i % 24) as f64;
let day_of_year = (i / 24 % 365) as f64;
let daily_temp_cycle = 5.0 * (2.0 * std::f64::consts::PI * hour_of_day / 24.0).cos();
let seasonal_temp_cycle = 15.0 * (2.0 * std::f64::consts::PI * day_of_year / 365.0).sin();
let temperature = 20.0 + daily_temp_cycle + seasonal_temp_cycle +
(rand::random::<f64>() - 0.5) * 3.0;
let humidity = 70.0 - (temperature - 20.0) * 1.5 +
(rand::random::<f64>() - 0.5) * 15.0;
let humidity = humidity.clamp(20.0, 95.0);
let pressure = 1013.25 + 10.0 * (day_of_year / 30.0).sin() +
(rand::random::<f64>() - 0.5) * 20.0;
let light = if hour_of_day >= 6.0 && hour_of_day <= 18.0 {
1000.0 * (std::f64::consts::PI * (hour_of_day - 6.0) / 12.0).sin() +
(rand::random::<f64>() - 0.5) * 200.0
} else {
(rand::random::<f64>() * 50.0).max(0.0)
};
let timestamp = format!("2024-{:03}-{:02}", day_of_year as u32 + 1, hour_of_day as u32);
data.push(DataPoint {
timestamp,
values: vec![temperature, humidity, pressure, light],
});
}
Self {
data,
feature_names: vec![
"temperature".to_string(),
"humidity".to_string(),
"pressure".to_string(),
"light".to_string()
],
normalizers: Vec::new(),
}
}
fn fit_normalizers(&mut self) {
let num_features = self.feature_names.len();
let mut sums = vec![0.0; num_features];
let mut sum_squares = vec![0.0; num_features];
let n = self.data.len() as f64;
for point in &self.data {
for (i, &value) in point.values.iter().enumerate() {
sums[i] += value;
sum_squares[i] += value * value;
}
}
self.normalizers = sums.iter().enumerate()
.map(|(i, &sum)| {
let mean = sum / n;
let variance = (sum_squares[i] / n) - (mean * mean);
let std = variance.sqrt().max(1e-8);
(mean, std)
})
.collect();
}
fn normalize(&self, point: &DataPoint) -> Array2<f64> {
let normalized: Vec<f64> = point.values.iter().enumerate()
.map(|(i, &value)| {
let (mean, std) = self.normalizers[i];
(value - mean) / std
})
.collect();
Array2::from_shape_vec((normalized.len(), 1), normalized).unwrap()
}
fn denormalize(&self, normalized_value: f64, feature_idx: usize) -> f64 {
let (mean, std) = self.normalizers[feature_idx];
normalized_value * std + mean
}
}
struct TimeSeriesPredictor {
network: LSTMNetwork,
trainer: Option<LSTMTrainer<MSELoss, Adam>>,
sequence_length: usize,
target_feature: usize,
}
impl TimeSeriesPredictor {
fn new(input_features: usize, sequence_length: usize, hidden_size: usize, target_feature: usize) -> Self {
let network = LSTMNetwork::new(input_features, hidden_size, 1);
Self {
network,
trainer: None,
sequence_length,
target_feature,
}
}
fn create_sequences(&self, data_loader: &CSVDataLoader) -> Vec<(Vec<Array2<f64>>, Vec<Array2<f64>>)> {
let mut sequences = Vec::new();
for i in 0..data_loader.data.len().saturating_sub(self.sequence_length) {
let mut inputs = Vec::new();
let mut targets = Vec::new();
for j in i..i + self.sequence_length {
inputs.push(data_loader.normalize(&data_loader.data[j]));
if j + 1 < data_loader.data.len() {
let next_point = &data_loader.data[j + 1];
let target_value = next_point.values[self.target_feature];
let (mean, std) = data_loader.normalizers[self.target_feature];
let normalized_target = (target_value - mean) / std;
targets.push(arr2(&[[normalized_target]])); }
}
if inputs.len() == targets.len() && !inputs.is_empty() {
sequences.push((inputs, targets));
}
}
sequences
}
fn train(&mut self, data_loader: &CSVDataLoader, validation_split: f64) {
println!("📊 Creating training sequences...");
let sequences = self.create_sequences(data_loader);
let split_idx = ((sequences.len() as f64) * (1.0 - validation_split)) as usize;
let (train_data, val_data) = sequences.split_at(split_idx);
println!("🎯 Training on {} sequences, validating on {} sequences",
train_data.len(), val_data.len());
let loss_function = MSELoss;
let optimizer = Adam::new(0.001);
let mut trainer = LSTMTrainer::new(self.network.clone(), loss_function, optimizer);
let mut config = rust_lstm::training::TrainingConfig::default();
config.epochs = 5; config.print_every = 2;
trainer = trainer.with_config(config);
trainer.train(train_data, Some(val_data));
self.trainer = Some(trainer);
println!("✅ Time series model training completed!");
}
fn predict_next(&mut self, data_loader: &CSVDataLoader, recent_data: &[DataPoint]) -> Option<f64> {
if recent_data.len() < self.sequence_length {
return None;
}
let trainer = self.trainer.as_mut()?;
let start_idx = recent_data.len() - self.sequence_length;
let inputs: Vec<Array2<f64>> = recent_data[start_idx..]
.iter()
.map(|point| data_loader.normalize(point))
.collect();
let predictions = trainer.predict(&inputs);
if let Some(prediction) = predictions.last() {
let normalized_pred = prediction[[0, 0]];
Some(data_loader.denormalize(normalized_pred, self.target_feature))
} else {
None
}
}
}
fn main() {
println!("📈 Real Data Time Series Prediction with LSTM");
println!("===============================================\n");
println!("📡 Generating synthetic IoT sensor data...");
let mut data_loader = CSVDataLoader::generate_synthetic_sensor_data(7);
println!("📊 Data loaded: {} data points with {} features",
data_loader.data.len(),
data_loader.feature_names.len());
println!("Features: {:?}", data_loader.feature_names);
println!("\n📋 Sample data points:");
for (i, point) in data_loader.data.iter().take(5).enumerate() {
println!("Point {}: {} -> {:?}",
i + 1, point.timestamp,
point.values.iter().map(|v| format!("{:.2}", v)).collect::<Vec<_>>());
}
println!("\n🔧 Fitting data normalizers...");
data_loader.fit_normalizers();
let mut predictor = TimeSeriesPredictor::new(
data_loader.feature_names.len(), 12, 32, 0, );
predictor.train(&data_loader, 0.2);
println!("\n🔮 Making temperature predictions:");
let recent_data = &data_loader.data[data_loader.data.len()-48..];
for i in 24..29 { let input_data = &recent_data[i-24..i];
if let Some(predicted_temp) = predictor.predict_next(&data_loader, input_data) {
let actual_temp = recent_data[i].values[0];
let error = (predicted_temp - actual_temp).abs();
println!("Hour {}: Predicted={:.1}°C, Actual={:.1}°C, Error={:.1}°C",
i + 1, predicted_temp, actual_temp, error);
}
}
let temps: Vec<f64> = data_loader.data.iter().map(|p| p.values[0]).collect();
let avg_temp = temps.iter().sum::<f64>() / temps.len() as f64;
let temp_range = temps.iter().fold((f64::INFINITY, f64::NEG_INFINITY), |(min, max), &t| {
(min.min(t), max.max(t))
});
println!("\n📈 Data statistics:");
println!("Average temperature: {:.1}°C", avg_temp);
println!("Temperature range: {:.1}°C to {:.1}°C", temp_range.0, temp_range.1);
}