Skip to main content

plot_redactor/model/
data.rs

1//! Data ingestion and columnar storage layer.
2//!
3//! This module is intentionally backend-agnostic:
4//! - `DataSource` is a trait for host applications to provide numeric columns.
5//! - `DataTable` is an internal normalized representation used by the editor.
6//! - CSV/TXT parsing utilities build `DataTable` from files.
7
8use std::fs;
9use std::path::Path;
10
11/// One numeric column with a stable display name.
12#[derive(Clone, Debug)]
13pub struct ColumnData {
14    pub name: String,
15    pub values: Vec<f64>,
16}
17
18/// In-memory normalized table used by plotting logic.
19#[derive(Clone, Debug)]
20pub struct DataTable {
21    pub columns: Vec<ColumnData>,
22    pub row_count: usize,
23}
24
25impl DataTable {
26    /// Creates an empty table.
27    pub fn empty() -> Self {
28        Self {
29            columns: Vec::new(),
30            row_count: 0,
31        }
32    }
33
34    /// Parses a CSV file into a numeric table.
35    pub fn from_csv_path(path: &Path) -> Result<Self, String> {
36        let text = fs::read_to_string(path)
37            .map_err(|e| format!("Failed to read CSV: {e} / Ne udalos prochitat CSV"))?;
38        parse_delimited(&text, Delimiter::Comma)
39    }
40
41    /// Parses a whitespace-separated TXT file into a numeric table.
42    pub fn from_txt_path(path: &Path) -> Result<Self, String> {
43        let text = fs::read_to_string(path)
44            .map_err(|e| format!("Failed to read TXT: {e} / Ne udalos prochitat TXT"))?;
45        parse_delimited(&text, Delimiter::Whitespace)
46    }
47
48    /// Returns column names in display order.
49    pub fn column_names(&self) -> Vec<String> {
50        self.columns.iter().map(|c| c.name.clone()).collect()
51    }
52
53    /// Checks whether a column with this name exists.
54    pub fn has_column(&self, name: &str) -> bool {
55        self.columns.iter().any(|c| c.name == name)
56    }
57
58    /// Returns immutable numeric values of a named column.
59    pub fn column_values(&self, name: &str) -> Option<&[f64]> {
60        self.columns
61            .iter()
62            .find(|c| c.name == name)
63            .map(|c| c.values.as_slice())
64    }
65
66    /// Builds `(x, y)` points from two selected columns.
67    pub fn points_for_columns(&self, x: &str, y: &str) -> Result<Vec<(f32, f32)>, String> {
68        let x_values = self
69            .column_values(x)
70            .ok_or_else(|| format!("X column not found: {x}"))?;
71        let y_values = self
72            .column_values(y)
73            .ok_or_else(|| format!("Y column not found: {y}"))?;
74
75        let len = x_values.len().min(y_values.len());
76        Ok((0..len)
77            .map(|i| (x_values[i] as f32, y_values[i] as f32))
78            .collect())
79    }
80
81    /// Normalizes any `DataSource` implementation into a `DataTable`.
82    pub fn from_data_source(source: &dyn DataSource) -> Result<Self, String> {
83        let names = source.column_names();
84        if names.is_empty() {
85            return Err("Data source has no columns / Istochnik ne soderzhit stolbcov".to_owned());
86        }
87
88        let mut columns = Vec::with_capacity(names.len());
89        let mut row_count = source.len();
90        for name in names {
91            let values = source
92                .column(&name)
93                .ok_or_else(|| format!("Missing column in source: {name}"))?;
94            row_count = row_count.min(values.len());
95            columns.push(ColumnData { name, values });
96        }
97
98        for col in &mut columns {
99            col.values.truncate(row_count);
100        }
101
102        Ok(Self { columns, row_count })
103    }
104}
105
106#[derive(Clone, Copy)]
107enum Delimiter {
108    Comma,
109    Whitespace,
110}
111
112fn parse_delimited(text: &str, delimiter: Delimiter) -> Result<DataTable, String> {
113    let mut lines = text
114        .lines()
115        .map(str::trim)
116        .filter(|line| !line.is_empty() && !line.starts_with('#'));
117
118    let first = lines
119        .next()
120        .ok_or_else(|| "Input file is empty / Fail pust".to_owned())?;
121
122    let first_tokens = split_tokens(first, delimiter);
123    if first_tokens.is_empty() {
124        return Err("First row has no values / V pervoy stroke net znacheniy".to_owned());
125    }
126
127    let first_is_header = first_tokens.iter().any(|t| t.parse::<f64>().is_err());
128    let headers: Vec<String>;
129    let mut rows: Vec<Vec<f64>> = Vec::new();
130
131    if first_is_header {
132        headers = first_tokens;
133    } else {
134        headers = (1..=first_tokens.len())
135            .map(|i| format!("col_{i}"))
136            .collect();
137        rows.push(parse_numeric_row(&first_tokens)?);
138    }
139
140    for (line_no, line) in lines.enumerate() {
141        let tokens = split_tokens(line, delimiter);
142        if tokens.len() != headers.len() {
143            return Err(format!(
144                "Row {} has {} values, expected {}",
145                line_no + 2,
146                tokens.len(),
147                headers.len()
148            ));
149        }
150        rows.push(parse_numeric_row(&tokens)?);
151    }
152
153    if rows.is_empty() {
154        return Err("No numeric rows found / Net chislovyh strok".to_owned());
155    }
156
157    let row_count = rows.len();
158    let mut columns = Vec::with_capacity(headers.len());
159    for (idx, name) in headers.into_iter().enumerate() {
160        let mut values = Vec::with_capacity(row_count);
161        for row in &rows {
162            values.push(row[idx]);
163        }
164        columns.push(ColumnData { name, values });
165    }
166
167    Ok(DataTable { columns, row_count })
168}
169
170fn split_tokens(line: &str, delimiter: Delimiter) -> Vec<String> {
171    match delimiter {
172        Delimiter::Comma => line
173            .split(',')
174            .map(|s| s.trim().to_owned())
175            .filter(|s| !s.is_empty())
176            .collect(),
177        Delimiter::Whitespace => line
178            .split_whitespace()
179            .map(|s| s.trim().to_owned())
180            .filter(|s| !s.is_empty())
181            .collect(),
182    }
183}
184
185fn parse_numeric_row(tokens: &[String]) -> Result<Vec<f64>, String> {
186    let mut row = Vec::with_capacity(tokens.len());
187    for token in tokens {
188        let value = token
189            .parse::<f64>()
190            .map_err(|_| format!("Failed to parse number: {token}"))?;
191        row.push(value);
192    }
193    Ok(row)
194}
195
196/// Generic data-provider contract for embedding this crate into host apps.
197pub trait DataSource: Send + Sync {
198    fn column(&self, name: &str) -> Option<Vec<f64>>;
199    fn column_names(&self) -> Vec<String>;
200    fn len(&self) -> usize;
201}
202
203impl DataSource for DataTable {
204    fn column(&self, name: &str) -> Option<Vec<f64>> {
205        self.column_values(name).map(ToOwned::to_owned)
206    }
207
208    fn column_names(&self) -> Vec<String> {
209        self.column_names()
210    }
211
212    fn len(&self) -> usize {
213        self.row_count
214    }
215}