use serde::{Serialize, Deserialize};
use std::path::{PathBuf, Path};
use std::collections::HashMap;
use anyhow::{Result, anyhow};
use tokio::fs;
use crate::data_core::analyzer::DataFrameAnalyzer;
use polars::prelude::{DataFrame, SerReader, SerWriter};
use polars::io::ipc::{IpcReader, IpcWriter};
use uuid::Uuid;
#[derive(Debug, Serialize, Deserialize)]
pub struct Workspace {
pub loaded_file_path: Option<PathBuf>,
pub cached_data_path: Option<PathBuf>,
pub active_columns: Vec<String>,
pub filters: HashMap<String, String>,
pub analysis_parameters: HashMap<String, String>,
pub column_datatypes: HashMap<String, String>,
pub ids_are_continuous: bool,
#[serde(skip)]
pub analyzer: Option<DataFrameAnalyzer>,
#[serde(skip)]
pub ranked_data: Option<DataFrame>,
#[serde(skip)]
pub anomaly_results: Option<DataFrame>,
#[serde(skip)]
pub clustering_results: Option<DataFrame>,
#[serde(skip)]
pub descriptive_statistics_results: Option<DataFrame>,
#[serde(skip)]
pub histogram_results: Option<DataFrame>,
#[serde(skip)]
pub null_analysis_results: Option<DataFrame>,
#[serde(skip)]
pub value_counts_results: Option<DataFrame>,
#[serde(skip)]
pub filter_results: Option<DataFrame>,
#[serde(skip)]
pub bar_chart_results: Option<DataFrame>,
#[serde(skip)]
pub correlation_matrix_results: Option<DataFrame>,
}
impl Default for Workspace {
fn default() -> Self {
Self {
loaded_file_path: None,
cached_data_path: None,
active_columns: Vec::new(),
filters: HashMap::new(),
analysis_parameters: HashMap::new(),
column_datatypes: HashMap::new(),
ids_are_continuous: false,
analyzer: None,
ranked_data: None,
anomaly_results: None,
clustering_results: None,
descriptive_statistics_results: None,
histogram_results: None,
null_analysis_results: None,
value_counts_results: None,
filter_results: None,
bar_chart_results: None,
correlation_matrix_results: None,
}
}
}
impl Workspace {
pub fn new() -> Self {
Default::default()
}
pub fn set_loaded_file_path(&mut self, path: PathBuf) {
self.loaded_file_path = Some(path);
}
pub fn add_active_column(&mut self, column: String) {
if !self.active_columns.contains(&column) {
self.active_columns.push(column);
}
}
pub fn remove_active_column(&mut self, column: &str) {
self.active_columns.retain(|c| c != column);
}
pub fn add_filter(&mut self, column: String, expression: String) {
self.filters.insert(column, expression);
}
pub fn remove_filter(&mut self, column: &str) {
self.filters.remove(column);
}
pub fn set_analysis_parameter(&mut self, key: String, value: String) {
self.analysis_parameters.insert(key, value);
}
pub fn get_analysis_parameter(&self, key: &str) -> Option<&String> {
self.analysis_parameters.get(key)
}
pub async fn save_to_file(&mut self, path: &Path) -> Result<()> {
let cache_dir = path.parent().unwrap_or_else(|| Path::new("")).join(".cache");
fs::create_dir_all(&cache_dir).await?;
if let Some(analyzer) = &self.analyzer {
let df = analyzer.get_dataframe();
let cache_file_name = format!("{}.ipc", Uuid::new_v4());
let cache_file_path = cache_dir.join(cache_file_name);
let file = fs::File::create(&cache_file_path).await?;
IpcWriter::new(file.into_std().await)
.finish(&mut df.clone())
.map_err(|e| anyhow!("Failed to save DataFrame to IPC: {}", e))?;
self.cached_data_path = Some(cache_file_path);
}
let serialized = serde_json::to_string_pretty(self)?;
fs::write(path, serialized.as_bytes()).await?;
Ok(())
}
pub async fn load_from_file(path: &Path) -> Result<Self> {
let content = fs::read_to_string(path).await?;
let mut state: Workspace = serde_json::from_str(&content)?;
if let Some(cached_path) = &state.cached_data_path {
if cached_path.exists() {
let file = fs::File::open(cached_path).await?;
let df = IpcReader::new(file.into_std().await)
.finish()
.map_err(|e| anyhow!("Failed to load DataFrame from IPC: {}", e))?;
state.analyzer = Some(DataFrameAnalyzer::new(df));
}
}
Ok(state)
}
}