lk_inside/data_core/
workspace.rs1use serde::{Serialize, Deserialize};
2use std::path::{PathBuf, Path};
3use std::collections::HashMap;
4use anyhow::{Result, anyhow};
5use tokio::fs;
6use crate::data_core::analyzer::DataFrameAnalyzer;
7use polars::prelude::{DataFrame, SerReader, SerWriter};
8use polars::io::ipc::{IpcReader, IpcWriter};
9use uuid::Uuid;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct Workspace {
13 pub loaded_file_path: Option<PathBuf>,
14 pub cached_data_path: Option<PathBuf>,
15 pub active_columns: Vec<String>,
16 pub filters: HashMap<String, String>,
17 pub analysis_parameters: HashMap<String, String>,
18 pub column_datatypes: HashMap<String, String>,
19 pub ids_are_continuous: bool,
20
21 #[serde(skip)]
22 pub analyzer: Option<DataFrameAnalyzer>,
23 #[serde(skip)]
24 pub ranked_data: Option<DataFrame>,
25 #[serde(skip)]
26 pub anomaly_results: Option<DataFrame>,
27 #[serde(skip)]
28 pub clustering_results: Option<DataFrame>,
29 #[serde(skip)]
30 pub descriptive_statistics_results: Option<DataFrame>,
31 #[serde(skip)]
32 pub histogram_results: Option<DataFrame>,
33 #[serde(skip)]
34 pub null_analysis_results: Option<DataFrame>,
35 #[serde(skip)]
36 pub value_counts_results: Option<DataFrame>,
37 #[serde(skip)]
38 pub filter_results: Option<DataFrame>,
39 #[serde(skip)]
40 pub bar_chart_results: Option<DataFrame>,
41 #[serde(skip)]
42 pub correlation_matrix_results: Option<DataFrame>,
43}
44
45impl Default for Workspace {
46 fn default() -> Self {
47 Self {
48 loaded_file_path: None,
49 cached_data_path: None,
50 active_columns: Vec::new(),
51 filters: HashMap::new(),
52 analysis_parameters: HashMap::new(),
53 column_datatypes: HashMap::new(),
54 ids_are_continuous: false,
55 analyzer: None,
56 ranked_data: None,
57 anomaly_results: None,
58 clustering_results: None,
59 descriptive_statistics_results: None,
60 histogram_results: None,
61 null_analysis_results: None,
62 value_counts_results: None,
63 filter_results: None,
64 bar_chart_results: None,
65 correlation_matrix_results: None,
66 }
67 }
68}
69
70impl Workspace {
71 pub fn new() -> Self {
72 Default::default()
73 }
74
75 pub fn set_loaded_file_path(&mut self, path: PathBuf) {
76 self.loaded_file_path = Some(path);
77 }
78
79 pub fn add_active_column(&mut self, column: String) {
80 if !self.active_columns.contains(&column) {
81 self.active_columns.push(column);
82 }
83 }
84
85 pub fn remove_active_column(&mut self, column: &str) {
86 self.active_columns.retain(|c| c != column);
87 }
88
89 pub fn add_filter(&mut self, column: String, expression: String) {
90 self.filters.insert(column, expression);
91 }
92
93 pub fn remove_filter(&mut self, column: &str) {
94 self.filters.remove(column);
95 }
96
97 pub fn set_analysis_parameter(&mut self, key: String, value: String) {
98 self.analysis_parameters.insert(key, value);
99 }
100
101 pub fn get_analysis_parameter(&self, key: &str) -> Option<&String> {
102 self.analysis_parameters.get(key)
103 }
104
105 pub async fn save_to_file(&mut self, path: &Path) -> Result<()> {
106 let cache_dir = path.parent().unwrap_or_else(|| Path::new("")).join(".cache");
107 fs::create_dir_all(&cache_dir).await?;
108
109 if let Some(analyzer) = &self.analyzer {
110 let df = analyzer.get_dataframe();
111 let cache_file_name = format!("{}.ipc", Uuid::new_v4());
112 let cache_file_path = cache_dir.join(cache_file_name);
113
114 let file = fs::File::create(&cache_file_path).await?;
115 IpcWriter::new(file.into_std().await)
116 .finish(&mut df.clone())
117 .map_err(|e| anyhow!("Failed to save DataFrame to IPC: {}", e))?;
118
119 self.cached_data_path = Some(cache_file_path);
120 }
121
122 let serialized = serde_json::to_string_pretty(self)?;
123 fs::write(path, serialized.as_bytes()).await?;
124 Ok(())
125 }
126
127 pub async fn load_from_file(path: &Path) -> Result<Self> {
128 let content = fs::read_to_string(path).await?;
129 let mut state: Workspace = serde_json::from_str(&content)?;
130
131 if let Some(cached_path) = &state.cached_data_path {
132 if cached_path.exists() {
133 let file = fs::File::open(cached_path).await?;
134 let df = IpcReader::new(file.into_std().await)
135 .finish()
136 .map_err(|e| anyhow!("Failed to load DataFrame from IPC: {}", e))?;
137 state.analyzer = Some(DataFrameAnalyzer::new(df));
138 }
139 }
140 Ok(state)
141 }
142}