Skip to main content

formualizer_workbook/backends/
json.rs

1use crate::IoError;
2use crate::traits::{
3    AccessGranularity, BackendCaps, CellData, MergedRange, NamedRange, SaveDestination, SheetData,
4    SpreadsheetReader, SpreadsheetWriter, TableDefinition,
5};
6use serde::{Deserialize, Serialize};
7use std::collections::BTreeMap;
8use std::fs::File;
9use std::io::{BufReader, Read, Write};
10use std::path::{Path, PathBuf};
11
12#[derive(Serialize, Deserialize, Debug, Default, Clone)]
13struct JsonWorkbook {
14    #[serde(default = "default_version")]
15    version: u32,
16    #[serde(default)]
17    compression: Option<CompressionType>,
18    #[serde(default)]
19    sources: Vec<JsonSource>,
20    #[serde(default)]
21    sheets: BTreeMap<String, JsonSheet>,
22}
23
24fn default_version() -> u32 {
25    1
26}
27
28#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
29#[serde(rename_all = "lowercase")]
30#[derive(Default)]
31enum CompressionType {
32    #[default]
33    None,
34    Lz4,
35}
36
37#[derive(Serialize, Deserialize, Debug, Default, Clone)]
38struct JsonSheet {
39    #[serde(default)]
40    cells: Vec<JsonCell>,
41    #[serde(default)]
42    dimensions: Option<(u32, u32)>,
43    #[serde(default)]
44    hidden: bool,
45    #[serde(default)]
46    date_system_1904: bool,
47    #[serde(default)]
48    merged_cells: Vec<MergedRange>,
49    #[serde(default)]
50    tables: Vec<TableDefinition>,
51    #[serde(default)]
52    named_ranges: Vec<NamedRange>,
53}
54
55#[derive(Serialize, Deserialize, Debug, Clone)]
56struct JsonCell {
57    row: u32,
58    col: u32,
59    #[serde(default)]
60    value: Option<JsonValue>,
61    #[serde(default)]
62    formula: Option<String>,
63    #[serde(default)]
64    style: Option<u32>,
65}
66
67#[derive(Serialize, Deserialize, Debug, Clone)]
68#[serde(tag = "type", rename_all = "lowercase")]
69enum JsonSource {
70    Scalar { name: String, version: Option<u64> },
71    Table { name: String, version: Option<u64> },
72}
73
74#[derive(Serialize, Deserialize, Debug, Clone)]
75#[serde(tag = "type", content = "value")]
76enum JsonValue {
77    Int(i64),
78    Number(f64),
79    Text(String),
80    Boolean(bool),
81    Empty,
82    Date(String),
83    DateTime(String),
84    Time(String),
85    Duration(i64),
86    Array(Vec<Vec<JsonValue>>),
87    Error(String),
88    Pending,
89}
90
91pub struct JsonAdapter {
92    data: JsonWorkbook,
93    path: Option<PathBuf>,
94    caps: BackendCaps,
95}
96
97impl Default for JsonAdapter {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103impl JsonAdapter {
104    pub fn new() -> Self {
105        Self {
106            data: JsonWorkbook::default(),
107            path: None,
108            caps: BackendCaps {
109                read: true,
110                write: true,
111                streaming: false,
112                tables: true,
113                named_ranges: true,
114                formulas: true,
115                styles: true,
116                lazy_loading: false,
117                random_access: true,
118                bytes_input: true,
119                date_system_1904: false,
120                merged_cells: true,
121                rich_text: false,
122                hyperlinks: false,
123                data_validations: false,
124                shared_formulas: false,
125            },
126        }
127    }
128
129    fn to_sheet_data(js: &JsonSheet) -> SheetData {
130        let mut cells: BTreeMap<(u32, u32), CellData> = BTreeMap::new();
131        for c in &js.cells {
132            let lit = c.value.as_ref().map(json_to_literal);
133            cells.insert(
134                (c.row, c.col),
135                CellData {
136                    value: lit,
137                    formula: c.formula.clone(),
138                    style: c.style,
139                },
140            );
141        }
142        SheetData {
143            cells,
144            dimensions: js.dimensions,
145            tables: js.tables.clone(),
146            named_ranges: js.named_ranges.clone(),
147            date_system_1904: js.date_system_1904,
148            merged_cells: js.merged_cells.clone(),
149            hidden: js.hidden,
150        }
151    }
152
153    pub fn to_json_string(&self) -> Result<String, IoError> {
154        Ok(serde_json::to_string_pretty(&self.data)?)
155    }
156
157    // Backend-specific helpers (not part of SpreadsheetWriter)
158    fn ensure_sheet_mut(&mut self, name: &str) -> &mut JsonSheet {
159        self.data.sheets.entry(name.to_string()).or_default()
160    }
161
162    pub fn set_dimensions(&mut self, sheet: &str, dims: Option<(u32, u32)>) {
163        self.ensure_sheet_mut(sheet).dimensions = dims;
164    }
165
166    pub fn set_date_system_1904(&mut self, sheet: &str, value: bool) {
167        self.ensure_sheet_mut(sheet).date_system_1904 = value;
168    }
169
170    pub fn set_merged_cells(&mut self, sheet: &str, merged: Vec<MergedRange>) {
171        self.ensure_sheet_mut(sheet).merged_cells = merged;
172    }
173
174    pub fn set_tables(&mut self, sheet: &str, tables: Vec<TableDefinition>) {
175        self.ensure_sheet_mut(sheet).tables = tables;
176    }
177
178    pub fn set_named_ranges(&mut self, sheet: &str, named: Vec<NamedRange>) {
179        self.ensure_sheet_mut(sheet).named_ranges = named;
180    }
181}
182
183impl SpreadsheetReader for JsonAdapter {
184    type Error = IoError;
185
186    fn access_granularity(&self) -> AccessGranularity {
187        AccessGranularity::Workbook
188    }
189
190    fn capabilities(&self) -> BackendCaps {
191        self.caps.clone()
192    }
193
194    fn sheet_names(&self) -> Result<Vec<String>, Self::Error> {
195        Ok(self.data.sheets.keys().cloned().collect())
196    }
197
198    fn open_path<P: AsRef<Path>>(path: P) -> Result<Self, Self::Error>
199    where
200        Self: Sized,
201    {
202        let file = File::open(path.as_ref())?;
203        let reader = BufReader::new(file);
204        let data: JsonWorkbook = serde_json::from_reader(reader)?;
205        Ok(JsonAdapter {
206            data,
207            path: Some(path.as_ref().to_path_buf()),
208            ..JsonAdapter::new()
209        })
210    }
211
212    fn open_reader(reader: Box<dyn Read + Send + Sync>) -> Result<Self, Self::Error>
213    where
214        Self: Sized,
215    {
216        let data: JsonWorkbook = serde_json::from_reader(reader)?;
217        Ok(JsonAdapter {
218            data,
219            ..JsonAdapter::new()
220        })
221    }
222
223    fn open_bytes(bytes: Vec<u8>) -> Result<Self, Self::Error>
224    where
225        Self: Sized,
226    {
227        let data: JsonWorkbook = serde_json::from_slice(&bytes)?;
228        Ok(JsonAdapter {
229            data,
230            ..JsonAdapter::new()
231        })
232    }
233
234    fn read_range(
235        &mut self,
236        sheet: &str,
237        start: (u32, u32),
238        end: (u32, u32),
239    ) -> Result<BTreeMap<(u32, u32), CellData>, Self::Error> {
240        if let Some(js) = self.data.sheets.get(sheet) {
241            let mut out = BTreeMap::new();
242            for c in &js.cells {
243                if c.row >= start.0 && c.row <= end.0 && c.col >= start.1 && c.col <= end.1 {
244                    let lit = c.value.as_ref().map(json_to_literal);
245                    out.insert(
246                        (c.row, c.col),
247                        CellData {
248                            value: lit,
249                            formula: c.formula.clone(),
250                            style: c.style,
251                        },
252                    );
253                }
254            }
255            Ok(out)
256        } else {
257            Ok(BTreeMap::new())
258        }
259    }
260
261    fn read_sheet(&mut self, sheet: &str) -> Result<SheetData, Self::Error> {
262        if let Some(js) = self.data.sheets.get(sheet) {
263            Ok(Self::to_sheet_data(js))
264        } else {
265            Ok(SheetData {
266                cells: BTreeMap::new(),
267                dimensions: None,
268                tables: vec![],
269                named_ranges: vec![],
270                date_system_1904: false,
271                merged_cells: vec![],
272                hidden: false,
273            })
274        }
275    }
276
277    fn sheet_bounds(&self, sheet: &str) -> Option<(u32, u32)> {
278        self.data.sheets.get(sheet).and_then(|s| s.dimensions)
279    }
280
281    fn is_loaded(&self, _sheet: &str, _row: Option<u32>, _col: Option<u32>) -> bool {
282        true
283    }
284}
285
286impl SpreadsheetWriter for JsonAdapter {
287    type Error = IoError;
288
289    fn write_cell(
290        &mut self,
291        sheet: &str,
292        row: u32,
293        col: u32,
294        data: CellData,
295    ) -> Result<(), Self::Error> {
296        let sheet_entry = self.data.sheets.entry(sheet.to_string()).or_default();
297        if let Some(cell) = sheet_entry
298            .cells
299            .iter_mut()
300            .find(|c| c.row == row && c.col == col)
301        {
302            cell.value = data.value.as_ref().map(literal_to_json);
303            cell.formula = data.formula;
304            cell.style = data.style;
305        } else {
306            sheet_entry.cells.push(JsonCell {
307                row,
308                col,
309                value: data.value.as_ref().map(literal_to_json),
310                formula: data.formula,
311                style: data.style,
312            });
313        }
314        Ok(())
315    }
316
317    fn write_range(
318        &mut self,
319        sheet: &str,
320        cells: BTreeMap<(u32, u32), CellData>,
321    ) -> Result<(), Self::Error> {
322        for ((r, c), d) in cells {
323            self.write_cell(sheet, r, c, d)?;
324        }
325        Ok(())
326    }
327
328    fn clear_range(
329        &mut self,
330        sheet: &str,
331        start: (u32, u32),
332        end: (u32, u32),
333    ) -> Result<(), Self::Error> {
334        if let Some(js) = self.data.sheets.get_mut(sheet) {
335            js.cells.retain(|c| {
336                !(c.row >= start.0 && c.row <= end.0 && c.col >= start.1 && c.col <= end.1)
337            });
338        }
339        Ok(())
340    }
341
342    fn create_sheet(&mut self, name: &str) -> Result<(), Self::Error> {
343        self.data.sheets.entry(name.to_string()).or_default();
344        Ok(())
345    }
346
347    fn delete_sheet(&mut self, name: &str) -> Result<(), Self::Error> {
348        self.data.sheets.remove(name);
349        Ok(())
350    }
351
352    fn rename_sheet(&mut self, old: &str, new: &str) -> Result<(), Self::Error> {
353        if let Some(sheet) = self.data.sheets.remove(old) {
354            self.data.sheets.insert(new.to_string(), sheet);
355        }
356        Ok(())
357    }
358
359    fn flush(&mut self) -> Result<(), Self::Error> {
360        Ok(())
361    }
362
363    fn save(&mut self) -> Result<(), Self::Error> {
364        if let Some(path) = &self.path {
365            let mut file = File::create(path)?;
366            let s = serde_json::to_string_pretty(&self.data)?;
367            file.write_all(s.as_bytes())?;
368            Ok(())
369        } else {
370            Ok(())
371        }
372    }
373
374    fn save_to<'a>(&mut self, dest: SaveDestination<'a>) -> Result<Option<Vec<u8>>, Self::Error> {
375        match dest {
376            SaveDestination::InPlace => self.save().map(|_| None),
377            SaveDestination::Path(path) => {
378                let mut file = File::create(path)?;
379                let s = serde_json::to_string_pretty(&self.data)?;
380                file.write_all(s.as_bytes())?;
381                self.path = Some(path.to_path_buf());
382                Ok(None)
383            }
384            SaveDestination::Writer(writer) => {
385                let s = serde_json::to_string_pretty(&self.data)?;
386                writer.write_all(s.as_bytes())?;
387                Ok(None)
388            }
389            SaveDestination::Bytes => {
390                let s = serde_json::to_vec_pretty(&self.data)?;
391                Ok(Some(s))
392            }
393        }
394    }
395}
396
397fn literal_to_json(v: &formualizer_common::LiteralValue) -> JsonValue {
398    use formualizer_common::LiteralValue as L;
399    match v {
400        L::Int(i) => JsonValue::Int(*i),
401        L::Number(n) => JsonValue::Number(*n),
402        L::Text(s) => JsonValue::Text(s.clone()),
403        L::Boolean(b) => JsonValue::Boolean(*b),
404        L::Empty => JsonValue::Empty,
405        L::Array(arr) => JsonValue::Array(
406            arr.iter()
407                .map(|row| row.iter().map(literal_to_json).collect())
408                .collect(),
409        ),
410        L::Date(d) => JsonValue::Date(d.to_string()),
411        L::DateTime(dt) => JsonValue::DateTime(dt.to_string()),
412        L::Time(t) => JsonValue::Time(t.to_string()),
413        L::Duration(dur) => JsonValue::Duration(dur.num_seconds()),
414        L::Error(e) => JsonValue::Error(e.kind.to_string()),
415        L::Pending => JsonValue::Pending,
416    }
417}
418
419fn json_to_literal(v: &JsonValue) -> formualizer_common::LiteralValue {
420    use formualizer_common::LiteralValue as L;
421    match v {
422        JsonValue::Int(i) => L::Int(*i),
423        JsonValue::Number(n) => L::Number(*n),
424        JsonValue::Text(s) => L::Text(s.clone()),
425        JsonValue::Boolean(b) => L::Boolean(*b),
426        JsonValue::Empty => L::Empty,
427        JsonValue::Array(arr) => L::Array(
428            arr.iter()
429                .map(|row| row.iter().map(json_to_literal).collect())
430                .collect(),
431        ),
432        JsonValue::Date(s) => {
433            let d = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d")
434                .unwrap_or_else(|_| chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap());
435            L::Date(d)
436        }
437        JsonValue::DateTime(s) => {
438            let dt = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
439                .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S"))
440                .unwrap_or_else(|_| {
441                    chrono::NaiveDate::from_ymd_opt(1970, 1, 1)
442                        .unwrap()
443                        .and_hms_opt(0, 0, 0)
444                        .unwrap()
445                });
446            L::DateTime(dt)
447        }
448        JsonValue::Time(s) => {
449            let t = chrono::NaiveTime::parse_from_str(s, "%H:%M:%S")
450                .unwrap_or_else(|_| chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap());
451            L::Time(t)
452        }
453        JsonValue::Duration(secs) => L::Duration(chrono::Duration::seconds(*secs)),
454        JsonValue::Error(code) => L::Error(
455            formualizer_common::error::ExcelError::from_error_string(code),
456        ),
457        JsonValue::Pending => L::Pending,
458    }
459}
460
461// Stream JSON workbook contents into the evaluation engine (values + formulas),
462// and propagate the workbook date system to the engine config.
463impl<R> formualizer_eval::engine::ingest::EngineLoadStream<R> for JsonAdapter
464where
465    R: formualizer_eval::traits::EvaluationContext,
466{
467    type Error = IoError;
468
469    fn stream_into_engine(
470        &mut self,
471        engine: &mut formualizer_eval::engine::Engine<R>,
472    ) -> Result<(), Self::Error> {
473        // Propagate date system: if any sheet declares 1904, treat workbook as 1904
474        let any_1904 = self.data.sheets.values().any(|s| s.date_system_1904);
475        engine.config.date_system = if any_1904 {
476            formualizer_eval::engine::DateSystem::Excel1904
477        } else {
478            formualizer_eval::engine::DateSystem::Excel1900
479        };
480
481        // Ensure all sheets exist in the graph first
482        for name in self.data.sheets.keys() {
483            engine
484                .add_sheet(name)
485                .map_err(|e| IoError::from_backend("json", e))?;
486        }
487
488        // Declare external sources (SourceVertex) before ingesting formulas.
489        for src in &self.data.sources {
490            match src {
491                JsonSource::Scalar { name, version } => engine
492                    .define_source_scalar(name, *version)
493                    .map_err(|e| IoError::from_backend("json", e))?,
494                JsonSource::Table { name, version } => {
495                    engine
496                        .define_source_table(name, *version)
497                        .map_err(|e| IoError::from_backend("json", e))?
498                }
499            }
500        }
501
502        // Ingest values via Arrow IngestBuilder per sheet
503        let chunk_rows: usize = 32 * 1024;
504        for (name, sheet) in self.data.sheets.iter() {
505            let dims = sheet.dimensions.unwrap_or_else(|| {
506                let mut max_r = 0u32;
507                let mut max_c = 0u32;
508                for c in &sheet.cells {
509                    if c.row > max_r {
510                        max_r = c.row;
511                    }
512                    if c.col > max_c {
513                        max_c = c.col;
514                    }
515                }
516                (max_r, max_c)
517            });
518            let rows = dims.0 as usize;
519            let cols = dims.1 as usize;
520
521            let mut aib = formualizer_eval::arrow_store::IngestBuilder::new(
522                name,
523                cols,
524                chunk_rows,
525                engine.config.date_system,
526            );
527            // Build a map for quick lookup
528            let mut cell_map: BTreeMap<(u32, u32), &JsonCell> = BTreeMap::new();
529            for c in &sheet.cells {
530                cell_map.insert((c.row, c.col), c);
531            }
532            for r in 1..=rows {
533                let mut row_vals: Vec<formualizer_common::LiteralValue> =
534                    vec![formualizer_common::LiteralValue::Empty; cols];
535                for c in 1..=cols {
536                    if let Some(cell) = cell_map.get(&(r as u32, c as u32))
537                        && let Some(v) = &cell.value
538                    {
539                        row_vals[c - 1] = json_to_literal(v);
540                    }
541                }
542                aib.append_row(&row_vals)
543                    .map_err(|e| IoError::from_backend("json", e))?;
544            }
545            let asheet = aib.finish();
546            let store = engine.sheet_store_mut();
547            if let Some(pos) = store.sheets.iter().position(|s| s.name.as_ref() == name) {
548                store.sheets[pos] = asheet;
549            } else {
550                store.sheets.push(asheet);
551            }
552
553            // Register native table metadata before formula ingest.
554            if let Some(sheet_id) = engine.sheet_id(name) {
555                for table in &sheet.tables {
556                    let (sr, sc, er, ec) = table.range;
557                    let sr0 = sr.saturating_sub(1);
558                    let sc0 = sc.saturating_sub(1);
559                    let er0 = er.saturating_sub(1);
560                    let ec0 = ec.saturating_sub(1);
561                    let start_ref = formualizer_eval::reference::CellRef::new(
562                        sheet_id,
563                        formualizer_eval::reference::Coord::new(sr0, sc0, true, true),
564                    );
565                    let end_ref = formualizer_eval::reference::CellRef::new(
566                        sheet_id,
567                        formualizer_eval::reference::Coord::new(er0, ec0, true, true),
568                    );
569                    let range_ref = formualizer_eval::reference::RangeRef::new(start_ref, end_ref);
570                    engine.define_table(
571                        &table.name,
572                        range_ref,
573                        table.headers.clone(),
574                        table.totals_row,
575                    )?;
576                }
577            }
578
579            // Formulas: either stage into graph now or defer
580            if engine.config.defer_graph_building {
581                for c in &sheet.cells {
582                    if let Some(f) = &c.formula {
583                        if f.is_empty() {
584                            continue;
585                        }
586                        engine.stage_formula_text(name, c.row, c.col, f.clone());
587                    }
588                }
589            } else {
590                let mut builder = engine.begin_bulk_ingest();
591                let sid = builder.add_sheet(name);
592                for c in &sheet.cells {
593                    if let Some(f) = &c.formula {
594                        if f.is_empty() {
595                            continue;
596                        }
597                        let with_eq = if f.starts_with('=') {
598                            f.clone()
599                        } else {
600                            format!("={f}")
601                        };
602                        let parsed = formualizer_parse::parser::parse(&with_eq)
603                            .map_err(|e| IoError::from_backend("json", e))?;
604                        builder.add_formulas(sid, std::iter::once((c.row, c.col, parsed)));
605                    }
606                }
607                let _ = builder.finish();
608            }
609        }
610
611        // Register named ranges (metadata only) into the dependency graph.
612        {
613            use formualizer_eval::engine::named_range::{NameScope, NamedDefinition};
614            use formualizer_eval::reference::{CellRef, Coord};
615            use rustc_hash::FxHashSet;
616
617            let mut seen: FxHashSet<(crate::traits::NamedRangeScope, String, String)> =
618                FxHashSet::default();
619
620            for (sheet_name, sheet) in self.data.sheets.iter() {
621                for named in &sheet.named_ranges {
622                    if named.address.sheet != *sheet_name {
623                        continue;
624                    }
625
626                    let key = (
627                        named.scope.clone(),
628                        named.address.sheet.clone(),
629                        named.name.clone(),
630                    );
631                    if !seen.insert(key) {
632                        continue;
633                    }
634
635                    let Some(sheet_id) = engine.sheet_id(&named.address.sheet) else {
636                        continue;
637                    };
638
639                    let addr = &named.address;
640                    let sr0 = addr.start_row.saturating_sub(1);
641                    let sc0 = addr.start_col.saturating_sub(1);
642                    let er0 = addr.end_row.saturating_sub(1);
643                    let ec0 = addr.end_col.saturating_sub(1);
644
645                    let start_coord = Coord::new(sr0, sc0, true, true);
646                    let end_coord = Coord::new(er0, ec0, true, true);
647                    let start_ref = CellRef::new(sheet_id, start_coord);
648                    let end_ref = CellRef::new(sheet_id, end_coord);
649
650                    let definition = if sr0 == er0 && sc0 == ec0 {
651                        NamedDefinition::Cell(start_ref)
652                    } else {
653                        let range_ref =
654                            formualizer_eval::reference::RangeRef::new(start_ref, end_ref);
655                        NamedDefinition::Range(range_ref)
656                    };
657
658                    let scope = match named.scope {
659                        crate::traits::NamedRangeScope::Workbook => NameScope::Workbook,
660                        crate::traits::NamedRangeScope::Sheet => NameScope::Sheet(sheet_id),
661                    };
662
663                    engine.define_name(&named.name, definition, scope)?;
664                }
665            }
666        }
667
668        // Finalize sheet indexes after load
669        for name in self.data.sheets.keys() {
670            engine.finalize_sheet_index(name);
671        }
672        engine.set_first_load_assume_new(false);
673        engine.reset_ensure_touched();
674        Ok(())
675    }
676}