Skip to main content

gitsheets/core/
mod.rs

1// git-sheets: Core module - fundamental data structures and operations
2// A tool for Excel sufferers who deserve better
3
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6use sha2::{Digest, Sha256};
7use std::collections::HashMap;
8use std::fs;
9use std::io::Write;
10use std::path::{Path, PathBuf};
11
12pub mod errors;
13pub use errors::{GitSheetsError, Result};
14
15// ============================================================================
16// CORE PRIMITIVES
17// ============================================================================
18
19/// A snapshot represents the complete state of a table at a point in time
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct Snapshot {
22    /// Unique identifier for this snapshot
23    pub id: String,
24    /// When this snapshot was taken
25    pub timestamp: DateTime<Utc>,
26    /// User-provided message explaining the snapshot
27    pub message: Option<String>,
28    /// The table data
29    pub table: Table,
30    /// Hashes for integrity verification
31    pub hashes: TableHashes,
32    /// Dependencies on other tables/files
33    pub dependencies: Vec<Dependency>,
34}
35
36/// A table is just headers + rows, nothing fancy
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct Table {
39    /// Column names (the primary key lives here)
40    pub headers: Vec<String>,
41    /// Raw row data
42    pub rows: Vec<Vec<String>>,
43    /// Optional: which column(s) form the primary key
44    pub primary_key: Option<Vec<usize>>,
45}
46
47/// Hashes for verifying table integrity
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct TableHashes {
50    /// Hash of the entire table (quick integrity check)
51    pub table_hash: String,
52    /// Per-header hashes (column-level verification)
53    pub header_hashes: HashMap<String, String>,
54    /// Optional: per-row hashes (fine-grained verification)
55    pub row_hashes: Option<Vec<String>>,
56}
57
58impl TableHashes {
59    /// Compute hashes for a table
60    pub fn compute(table: &Table) -> Self {
61        let mut hasher = Sha256::new();
62
63        // Hash the entire table by concatenating all data
64        for header in &table.headers {
65            hasher.update(header.as_bytes());
66        }
67        for row in &table.rows {
68            for cell in row {
69                hasher.update(cell.as_bytes());
70            }
71        }
72
73        let table_hash = format!("{:x}", hasher.finalize());
74
75        // Compute per-header hashes
76        let mut header_hashes = HashMap::new();
77        for (idx, header) in table.headers.iter().enumerate() {
78            let mut hasher = Sha256::new();
79            hasher.update(header.as_bytes());
80
81            // Hash all values in this column
82            for row in &table.rows {
83                if idx < row.len() {
84                    hasher.update(row[idx].as_bytes());
85                }
86            }
87
88            header_hashes.insert(header.clone(), format!("{:x}", hasher.finalize()));
89        }
90
91        Self {
92            table_hash,
93            header_hashes,
94            row_hashes: None,
95        }
96    }
97}
98
99/// A dependency represents a reference to another table or file
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct Dependency {
102    /// Name or identifier of the dependency
103    pub name: String,
104    /// File path if it's external
105    pub path: Option<PathBuf>,
106    /// Hash of the dependency at snapshot time
107    pub hash: String,
108}
109
110// ============================================================================
111// SNAPSHOT OPERATIONS
112// ============================================================================
113
114impl Snapshot {
115    /// Create a new snapshot from a table
116    pub fn new(table: Table, message: Option<String>) -> Self {
117        let hashes = TableHashes::compute(&table);
118        let id = format!("{}-{}", Utc::now().timestamp(), &hashes.table_hash[..8]);
119
120        Self {
121            id,
122            timestamp: Utc::now(),
123            message,
124            table,
125            hashes,
126            dependencies: Vec::new(),
127        }
128    }
129
130    /// Add a dependency to this snapshot
131    pub fn add_dependency(&mut self, name: String, path: Option<PathBuf>, hash: String) {
132        self.dependencies.push(Dependency { name, path, hash });
133    }
134
135    /// Save snapshot to disk as TOML
136    pub fn save(&self, path: &Path) -> Result<()> {
137        let toml_string = toml::to_string_pretty(self)?;
138        fs::write(path, toml_string)?;
139        Ok(())
140    }
141
142    /// Load snapshot from disk
143    pub fn load(path: &Path) -> Result<Snapshot> {
144        let content = fs::read_to_string(path)?;
145        let snapshot: Snapshot = toml::from_str(&content)?;
146        Ok(snapshot)
147    }
148
149    /// Verify integrity of this snapshot
150    pub fn verify(&self) -> bool {
151        let computed = TableHashes::compute(&self.table);
152        computed.table_hash == self.hashes.table_hash
153    }
154
155    /// Verify dependencies of this snapshot
156    pub fn verify_dependencies(&self) -> Result<()> {
157        for dep in &self.dependencies {
158            if let Some(dep_path) = &dep.path {
159                let content = fs::read_to_string(dep_path)?;
160                let computed_hash = Self::compute_hash(&content);
161                if computed_hash != dep.hash {
162                    return Err(GitSheetsError::DependencyHashMismatch(format!(
163                        "Dependency '{}' hash mismatch",
164                        dep.name
165                    )));
166                }
167            }
168        }
169        Ok(())
170    }
171
172    /// Compute hash for string content
173    fn compute_hash(content: &str) -> String {
174        let mut hasher = Sha256::new();
175        hasher.update(content.as_bytes());
176        format!("{:x}", hasher.finalize())
177    }
178}
179
180// ============================================================================
181// TABLE OPERATIONS
182// ============================================================================
183
184impl Table {
185    /// Create a table from CSV data
186    pub fn from_csv(path: &Path) -> Result<Self> {
187        let mut reader = csv::Reader::from_path(path)?;
188
189        // Get headers
190        let headers: Vec<String> = reader
191            .headers()?
192            .iter()
193            .map(|h| h.trim().to_string())
194            .collect();
195
196        // Get rows
197        let mut rows = Vec::new();
198        for result in reader.records() {
199            let record = result?;
200            let row: Vec<String> = record.iter().map(|cell| cell.trim().to_string()).collect();
201            rows.push(row);
202        }
203
204        if rows.is_empty() {
205            return Err(GitSheetsError::EmptyTable);
206        }
207
208        Ok(Self {
209            headers,
210            rows,
211            primary_key: None,
212        })
213    }
214
215    /// Set which columns form the primary key
216    pub fn set_primary_key(&mut self, column_indices: Vec<usize>) {
217        self.primary_key = Some(column_indices);
218    }
219
220    /// Get the primary key for a specific row
221    pub fn get_row_key(&self, row_idx: usize) -> Result<Vec<String>> {
222        let pk_indices = self
223            .primary_key
224            .as_ref()
225            .ok_or_else(|| GitSheetsError::NoPrimaryKey)?;
226
227        let row = self.rows.get(row_idx).ok_or_else(|| {
228            GitSheetsError::InvalidRowIndex(format!(
229                "Row index {} exceeds row count {}",
230                row_idx,
231                self.rows.len()
232            ))
233        })?;
234
235        let pk_values: Vec<String> = pk_indices
236            .iter()
237            .filter_map(|&idx| row.get(idx).cloned())
238            .collect();
239
240        if pk_values.is_empty() {
241            return Err(GitSheetsError::NoPrimaryKey);
242        }
243
244        Ok(pk_values)
245    }
246}
247
248// ============================================================================
249// REPO OPERATIONS
250// ============================================================================
251
252/// A git-sheets repository
253pub struct GitSheetsRepo {
254    /// Path to the repository
255    pub path: PathBuf,
256    /// Git repository handle (optional)
257    pub git_repo: Option<git2::Repository>,
258}
259
260impl GitSheetsRepo {
261    /// Initialize a new git-sheets repository
262    pub fn init(path: PathBuf) -> Result<GitSheetsRepo> {
263        let repo_path = path.canonicalize()?;
264
265        // Create directory structure
266        std::fs::create_dir_all(repo_path.join("snapshots"))?;
267        std::fs::create_dir_all(repo_path.join("diffs"))?;
268
269        // Create .gitignore if needed
270        let gitignore_path = repo_path.join(".gitignore");
271        if !gitignore_path.exists() {
272            let mut gitignore = std::fs::File::create(gitignore_path)?;
273            writeln!(gitignore, "snapshots/")?;
274            writeln!(gitignore, "diffs/")?;
275            writeln!(gitignore, "*.toml")?;
276            writeln!(gitignore, "*.json")?;
277        }
278
279        Ok(GitSheetsRepo {
280            path: repo_path,
281            git_repo: None,
282        })
283    }
284
285    /// Open an existing git-sheets repository
286    pub fn open(path: &str) -> Result<GitSheetsRepo> {
287        let repo_path = PathBuf::from(path).canonicalize()?;
288
289        if !repo_path.join("snapshots").exists() {
290            return Err(GitSheetsError::FileSystemError(
291                "Not a git-sheets repository".to_string(),
292            ));
293        }
294
295        Ok(GitSheetsRepo {
296            path: repo_path,
297            git_repo: None,
298        })
299    }
300
301    /// Commit a snapshot to git
302    pub fn commit_snapshot(&self) -> Result<()> {
303        // This is a placeholder implementation
304        // In a real implementation, this would integrate with git
305        Ok(())
306    }
307
308    /// Check if there are uncommitted changes
309    pub fn has_changes(&self) -> bool {
310        // Placeholder implementation
311        false
312    }
313}