lk_inside/data_core/
workspace.rs

1use serde::{Serialize, Deserialize};
2use std::path::{PathBuf, Path};
3use std::collections::HashMap;
4use anyhow::{Result, anyhow};
5use tokio::fs;
6use crate::data_core::analyzer::DataFrameAnalyzer;
7use polars::prelude::{DataFrame, SerReader, SerWriter};
8use polars::io::ipc::{IpcReader, IpcWriter};
9use uuid::Uuid;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct Workspace {
13    pub loaded_file_path: Option<PathBuf>,
14    pub cached_data_path: Option<PathBuf>,
15    pub active_columns: Vec<String>,
16    pub filters: HashMap<String, String>,
17    pub analysis_parameters: HashMap<String, String>,
18    pub column_datatypes: HashMap<String, String>,
19    pub ids_are_continuous: bool,
20
21    #[serde(skip)]
22    pub analyzer: Option<DataFrameAnalyzer>,
23    #[serde(skip)]
24    pub ranked_data: Option<DataFrame>,
25    #[serde(skip)]
26    pub anomaly_results: Option<DataFrame>,
27    #[serde(skip)]
28    pub clustering_results: Option<DataFrame>,
29    #[serde(skip)]
30    pub descriptive_statistics_results: Option<DataFrame>,
31    #[serde(skip)]
32    pub histogram_results: Option<DataFrame>,
33    #[serde(skip)]
34    pub null_analysis_results: Option<DataFrame>,
35    #[serde(skip)]
36    pub value_counts_results: Option<DataFrame>,
37    #[serde(skip)]
38    pub filter_results: Option<DataFrame>,
39    #[serde(skip)]
40    pub bar_chart_results: Option<DataFrame>,
41    #[serde(skip)]
42    pub correlation_matrix_results: Option<DataFrame>,
43}
44
45impl Default for Workspace {
46    fn default() -> Self {
47        Self {
48            loaded_file_path: None,
49            cached_data_path: None,
50            active_columns: Vec::new(),
51            filters: HashMap::new(),
52            analysis_parameters: HashMap::new(),
53            column_datatypes: HashMap::new(),
54            ids_are_continuous: false,
55            analyzer: None,
56            ranked_data: None,
57            anomaly_results: None,
58            clustering_results: None,
59            descriptive_statistics_results: None,
60            histogram_results: None,
61            null_analysis_results: None,
62            value_counts_results: None,
63            filter_results: None,
64            bar_chart_results: None,
65            correlation_matrix_results: None,
66        }
67    }
68}
69
70impl Workspace {
71    pub fn new() -> Self {
72        Default::default()
73    }
74
75    pub fn set_loaded_file_path(&mut self, path: PathBuf) {
76        self.loaded_file_path = Some(path);
77    }
78
79    pub fn add_active_column(&mut self, column: String) {
80        if !self.active_columns.contains(&column) {
81            self.active_columns.push(column);
82        }
83    }
84
85    pub fn remove_active_column(&mut self, column: &str) {
86        self.active_columns.retain(|c| c != column);
87    }
88
89    pub fn add_filter(&mut self, column: String, expression: String) {
90        self.filters.insert(column, expression);
91    }
92
93    pub fn remove_filter(&mut self, column: &str) {
94        self.filters.remove(column);
95    }
96
97    pub fn set_analysis_parameter(&mut self, key: String, value: String) {
98        self.analysis_parameters.insert(key, value);
99    }
100
101    pub fn get_analysis_parameter(&self, key: &str) -> Option<&String> {
102    self.analysis_parameters.get(key)
103    }
104
105    pub async fn save_to_file(&mut self, path: &Path) -> Result<()> {
106        let cache_dir = path.parent().unwrap_or_else(|| Path::new("")).join(".cache");
107        fs::create_dir_all(&cache_dir).await?;
108
109        if let Some(analyzer) = &self.analyzer {
110            let df = analyzer.get_dataframe();
111            let cache_file_name = format!("{}.ipc", Uuid::new_v4());
112            let cache_file_path = cache_dir.join(cache_file_name);
113
114            let file = fs::File::create(&cache_file_path).await?;
115            IpcWriter::new(file.into_std().await)
116                .finish(&mut df.clone())
117                .map_err(|e| anyhow!("Failed to save DataFrame to IPC: {}", e))?;
118            
119            self.cached_data_path = Some(cache_file_path);
120        }
121
122        let serialized = serde_json::to_string_pretty(self)?;
123        fs::write(path, serialized.as_bytes()).await?;
124        Ok(())
125    }
126
127    pub async fn load_from_file(path: &Path) -> Result<Self> {
128        let content = fs::read_to_string(path).await?;
129        let mut state: Workspace = serde_json::from_str(&content)?;
130
131        if let Some(cached_path) = &state.cached_data_path {
132            if cached_path.exists() {
133                let file = fs::File::open(cached_path).await?;
134                let df = IpcReader::new(file.into_std().await)
135                    .finish()
136                    .map_err(|e| anyhow!("Failed to load DataFrame from IPC: {}", e))?;
137                state.analyzer = Some(DataFrameAnalyzer::new(df));
138            }
139        }
140        Ok(state)
141    }
142}