Skip to main content

cortex_runtime/acquisition/
canvas_extractor.rs

1//! Three-tier extraction for canvas and WebGL applications.
2//!
3//! Canvas apps (spreadsheets, design tools, maps) render to `<canvas>` elements,
4//! making traditional DOM-based extraction impossible. This module provides
5//! three strategies to extract structured data:
6//!
7//! 1. **Known App APIs** — For apps like Google Sheets, Figma, etc., fetch data
8//!    directly via their REST API. Zero browser. Highest reliability.
9//! 2. **Accessibility Tree** — Use CDP `Accessibility.getFullAXTree()` to read
10//!    the accessibility layer. Requires one browser render but reads structured
11//!    data, not pixels.
12//! 3. **App State Extraction** — Access the application's JavaScript state
13//!    (`window.__INITIAL_STATE__`, Redux store, React fiber tree). Requires
14//!    one browser render.
15
16use crate::acquisition::http_client::HttpClient;
17use crate::acquisition::http_session::HttpSession;
18use crate::renderer::RenderContext;
19use anyhow::Result;
20use serde::{Deserialize, Serialize};
21use std::sync::OnceLock;
22
23/// Embedded known canvas app API configuration.
24const KNOWN_CANVAS_APIS_JSON: &str = include_str!("known_canvas_apis.json");
25
26/// The type of canvas application detected.
27#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
28pub enum CanvasAppType {
29    Spreadsheet,
30    DesignTool,
31    Map,
32    Whiteboard,
33    Game,
34    Diagram,
35    Unknown,
36}
37
38/// Structured data extracted from a grid-based canvas app (e.g., spreadsheet).
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct GridData {
41    /// Number of rows discovered.
42    pub rows: u32,
43    /// Number of columns discovered.
44    pub cols: u32,
45    /// Cell data: (row, col, value).
46    pub cells: Vec<(u32, u32, String)>,
47    /// Column headers, if any.
48    pub headers: Vec<String>,
49}
50
51/// A layer in a design/whiteboard tool.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct Layer {
54    /// Layer name or ID.
55    pub name: String,
56    /// Whether the layer is visible.
57    pub visible: bool,
58    /// Child elements in this layer.
59    pub children: Vec<CanvasElement>,
60}
61
62/// An interactive element discovered on a canvas.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct CanvasElement {
65    /// Element label or accessible name.
66    pub label: String,
67    /// Role (button, textbox, cell, image, etc.).
68    pub role: String,
69    /// Bounding box: (x, y, width, height).
70    pub bounds: Option<(f32, f32, f32, f32)>,
71    /// Associated action URL or JS function, if any.
72    pub action: Option<String>,
73}
74
75/// Complete state extracted from a canvas application.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct CanvasState {
78    /// The type of canvas app detected.
79    pub app_type: CanvasAppType,
80    /// Grid data for spreadsheet-like apps.
81    pub grid: Option<GridData>,
82    /// Layer hierarchy for design tools.
83    pub layers: Option<Vec<Layer>>,
84    /// All visible text content: (text, x, y).
85    pub text_content: Vec<(String, f32, f32)>,
86    /// Interactive elements (buttons, inputs, cells).
87    pub interactive_elements: Vec<CanvasElement>,
88    /// Raw application state as JSON, if available.
89    pub raw_state: Option<serde_json::Value>,
90    /// Which tier successfully extracted data.
91    pub extraction_tier: ExtractionTier,
92}
93
94/// Which extraction strategy produced the data.
95#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
96pub enum ExtractionTier {
97    /// Tier 1: Known app REST API.
98    KnownApi,
99    /// Tier 2: Browser accessibility tree.
100    AccessibilityTree,
101    /// Tier 3: JavaScript app state.
102    AppState,
103    /// No extraction succeeded.
104    None,
105}
106
107// ── Known API configuration types ───────────────────────────────────────────
108
109#[derive(Debug, Clone, Deserialize)]
110struct KnownCanvasApi {
111    data_api: String,
112    #[allow(dead_code)]
113    edit_api: Option<String>,
114    #[allow(dead_code)]
115    auth: Option<String>,
116    format: String,
117    app_type: String,
118}
119
120type CanvasApiRegistry = std::collections::HashMap<String, KnownCanvasApi>;
121
122fn canvas_api_registry() -> &'static CanvasApiRegistry {
123    static REGISTRY: OnceLock<CanvasApiRegistry> = OnceLock::new();
124    REGISTRY.get_or_init(|| serde_json::from_str(KNOWN_CANVAS_APIS_JSON).unwrap_or_default())
125}
126
127// ── Tier 1: Known App APIs ──────────────────────────────────────────────────
128
129/// Extract canvas state via a known REST API (Tier 1).
130///
131/// Checks the URL against known canvas apps (Google Sheets, Figma, etc.).
132/// If matched, fetches data directly via HTTP. Zero browser overhead.
133///
134/// Returns `None` if the URL does not match any known canvas app.
135pub async fn extract_via_known_api(
136    url: &str,
137    _session: Option<&HttpSession>,
138    client: &HttpClient,
139) -> Option<CanvasState> {
140    let registry = canvas_api_registry();
141
142    // Find matching platform by checking URL prefixes
143    let matching_config = registry
144        .iter()
145        .find(|(domain_prefix, _)| url.contains(domain_prefix.as_str()));
146
147    let (domain_key, config) = matching_config?;
148
149    // Build the API URL
150    let api_url = if config.data_api.starts_with("http") {
151        config.data_api.clone()
152    } else {
153        // Resolve relative to the page URL
154        let base = url.split('?').next().unwrap_or(url);
155        format!("{}{}", base.trim_end_matches('/'), config.data_api)
156    };
157
158    // Make the HTTP request
159    let timeout = 10000;
160    let resp = client.get(&api_url, timeout).await.ok()?;
161
162    if resp.status != 200 {
163        return None;
164    }
165
166    let app_type = match config.app_type.as_str() {
167        "spreadsheet" => CanvasAppType::Spreadsheet,
168        "design" => CanvasAppType::DesignTool,
169        "map" => CanvasAppType::Map,
170        "whiteboard" => CanvasAppType::Whiteboard,
171        "diagram" => CanvasAppType::Diagram,
172        _ => CanvasAppType::Unknown,
173    };
174
175    // Parse the response based on format
176    let raw_state: Option<serde_json::Value> = if config.format == "json" {
177        serde_json::from_str(&resp.body).ok()
178    } else {
179        None
180    };
181
182    // Try to extract grid data from JSON (for spreadsheets)
183    let grid = if app_type == CanvasAppType::Spreadsheet {
184        extract_grid_from_json(raw_state.as_ref())
185    } else {
186        None
187    };
188
189    // Try to extract layers (for design tools)
190    let layers = if app_type == CanvasAppType::DesignTool {
191        extract_layers_from_json(raw_state.as_ref())
192    } else {
193        None
194    };
195
196    tracing::info!(
197        "Tier 1: extracted canvas state for {} via known API ({})",
198        domain_key,
199        config.app_type
200    );
201
202    Some(CanvasState {
203        app_type,
204        grid,
205        layers,
206        text_content: Vec::new(),
207        interactive_elements: Vec::new(),
208        raw_state,
209        extraction_tier: ExtractionTier::KnownApi,
210    })
211}
212
213// ── Tier 2: Accessibility Tree ──────────────────────────────────────────────
214
215/// Extract canvas state from the browser's accessibility tree (Tier 2).
216///
217/// Uses `Accessibility.getFullAXTree()` via CDP to read structured data
218/// from the accessibility layer. This works for apps that properly
219/// implement ARIA attributes.
220///
221/// Requires a browser context but reads structured data, not pixels.
222pub async fn extract_via_accessibility(context: &dyn RenderContext) -> Option<CanvasState> {
223    // Execute JS to read accessibility information
224    let js = r#"
225    (() => {
226        const result = { elements: [], text: [] };
227
228        // Gather all elements with ARIA roles
229        const all = document.querySelectorAll('[role], [aria-label], [aria-valuetext]');
230        for (const el of all) {
231            const rect = el.getBoundingClientRect();
232            const entry = {
233                role: el.getAttribute('role') || el.tagName.toLowerCase(),
234                label: el.getAttribute('aria-label') || el.textContent?.trim()?.substring(0, 200) || '',
235                x: rect.x, y: rect.y, w: rect.width, h: rect.height,
236                action: el.getAttribute('href') || el.getAttribute('data-action') || null
237            };
238            if (entry.label && rect.width > 0 && rect.height > 0) {
239                result.elements.push(entry);
240            }
241        }
242
243        // Gather text from canvas-adjacent elements
244        const textEls = document.querySelectorAll('canvas ~ *, canvas + *, [aria-live]');
245        for (const el of textEls) {
246            const text = el.textContent?.trim();
247            if (text && text.length > 0 && text.length < 1000) {
248                const rect = el.getBoundingClientRect();
249                result.text.push({ text, x: rect.x, y: rect.y });
250            }
251        }
252
253        // Check for grid/table ARIA patterns
254        const grids = document.querySelectorAll('[role="grid"], [role="table"], [role="spreadsheet"]');
255        if (grids.length > 0) {
256            const grid = grids[0];
257            const rows = grid.querySelectorAll('[role="row"]');
258            const gridData = { rows: rows.length, cols: 0, cells: [], headers: [] };
259            rows.forEach((row, ri) => {
260                const cells = row.querySelectorAll('[role="gridcell"], [role="columnheader"], [role="cell"]');
261                gridData.cols = Math.max(gridData.cols, cells.length);
262                cells.forEach((cell, ci) => {
263                    const text = cell.textContent?.trim() || '';
264                    if (cell.getAttribute('role') === 'columnheader') {
265                        gridData.headers.push(text);
266                    }
267                    if (text) {
268                        gridData.cells.push([ri, ci, text]);
269                    }
270                });
271            });
272            result.grid = gridData;
273        }
274
275        return JSON.stringify(result);
276    })()
277    "#;
278
279    let js_result = context.execute_js(js).await.ok()?;
280    let result_str = js_result.as_str()?;
281    let parsed: serde_json::Value = serde_json::from_str(result_str).ok()?;
282
283    let elements: Vec<CanvasElement> = parsed
284        .get("elements")
285        .and_then(|v| v.as_array())
286        .map(|arr| {
287            arr.iter()
288                .filter_map(|el| {
289                    let label = el.get("label")?.as_str()?.to_string();
290                    let role = el.get("role")?.as_str()?.to_string();
291                    let x = el.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
292                    let y = el.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
293                    let w = el.get("w").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
294                    let h = el.get("h").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
295                    let action = el.get("action").and_then(|v| v.as_str()).map(String::from);
296                    Some(CanvasElement {
297                        label,
298                        role,
299                        bounds: Some((x, y, w, h)),
300                        action,
301                    })
302                })
303                .collect()
304        })
305        .unwrap_or_default();
306
307    let text_content: Vec<(String, f32, f32)> = parsed
308        .get("text")
309        .and_then(|v| v.as_array())
310        .map(|arr| {
311            arr.iter()
312                .filter_map(|t| {
313                    let text = t.get("text")?.as_str()?.to_string();
314                    let x = t.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
315                    let y = t.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0) as f32;
316                    Some((text, x, y))
317                })
318                .collect()
319        })
320        .unwrap_or_default();
321
322    // Extract grid data if present
323    let grid = parsed.get("grid").and_then(|g| {
324        let rows = g.get("rows")?.as_u64()? as u32;
325        let cols = g.get("cols")?.as_u64()? as u32;
326        let cells: Vec<(u32, u32, String)> = g
327            .get("cells")?
328            .as_array()?
329            .iter()
330            .filter_map(|c| {
331                let arr = c.as_array()?;
332                let r = arr.first()?.as_u64()? as u32;
333                let c_idx = arr.get(1)?.as_u64()? as u32;
334                let val = arr.get(2)?.as_str()?.to_string();
335                Some((r, c_idx, val))
336            })
337            .collect();
338        let headers: Vec<String> = g
339            .get("headers")?
340            .as_array()?
341            .iter()
342            .filter_map(|h| h.as_str().map(String::from))
343            .collect();
344        Some(GridData {
345            rows,
346            cols,
347            cells,
348            headers,
349        })
350    });
351
352    let app_type = if grid.is_some() {
353        CanvasAppType::Spreadsheet
354    } else if !elements.is_empty() {
355        CanvasAppType::Unknown
356    } else {
357        return None; // No useful data extracted
358    };
359
360    tracing::info!(
361        "Tier 2: extracted {} elements + {} text entries from accessibility tree",
362        elements.len(),
363        text_content.len()
364    );
365
366    Some(CanvasState {
367        app_type,
368        grid,
369        layers: None,
370        text_content,
371        interactive_elements: elements,
372        raw_state: None,
373        extraction_tier: ExtractionTier::AccessibilityTree,
374    })
375}
376
377// ── Tier 3: App State Extraction ────────────────────────────────────────────
378
379/// Extract canvas state from the application's JavaScript state (Tier 3).
380///
381/// Tries to access global state objects commonly used by modern web apps:
382/// - `window.__INITIAL_STATE__`
383/// - `window.__NEXT_DATA__`
384/// - `window.__NUXT__`
385/// - Redux store
386/// - React fiber tree
387pub async fn extract_via_app_state(context: &dyn RenderContext) -> Option<CanvasState> {
388    let js = r#"
389    (() => {
390        // Try common state objects
391        const candidates = [
392            window.__INITIAL_STATE__,
393            window.__NEXT_DATA__,
394            window.__NUXT__,
395            window.__APP_STATE__,
396            window.__PRELOADED_STATE__,
397        ];
398
399        for (const state of candidates) {
400            if (state && typeof state === 'object') {
401                try {
402                    const json = JSON.stringify(state);
403                    if (json.length > 10 && json.length < 5000000) {
404                        return json;
405                    }
406                } catch(e) {}
407            }
408        }
409
410        // Try Redux store
411        try {
412            if (window.__REDUX_DEVTOOLS_EXTENSION__ || window.__store__) {
413                const store = window.__store__ || document.querySelector('[data-reactroot]')?.__store__;
414                if (store && typeof store.getState === 'function') {
415                    const state = store.getState();
416                    const json = JSON.stringify(state);
417                    if (json.length > 10 && json.length < 5000000) {
418                        return json;
419                    }
420                }
421            }
422        } catch(e) {}
423
424        return null;
425    })()
426    "#;
427
428    let js_result = context.execute_js(js).await.ok()?;
429    let result_str = js_result.as_str()?;
430    let raw_state: serde_json::Value = serde_json::from_str(result_str).ok()?;
431
432    // Try to classify the app type from the state structure
433    let app_type = classify_app_from_state(&raw_state);
434    let grid = extract_grid_from_json(Some(&raw_state));
435    let layers = extract_layers_from_json(Some(&raw_state));
436
437    tracing::info!("Tier 3: extracted app state ({:?})", app_type);
438
439    Some(CanvasState {
440        app_type,
441        grid,
442        layers,
443        text_content: Vec::new(),
444        interactive_elements: Vec::new(),
445        raw_state: Some(raw_state),
446        extraction_tier: ExtractionTier::AppState,
447    })
448}
449
450// ── Detect if a page is a canvas app ────────────────────────────────────────
451
452/// Check if a page is likely a canvas/WebGL application.
453///
454/// Checks for `<canvas>` elements and WebGL contexts in the HTML source.
455/// This is a quick heuristic check that doesn't require a browser.
456pub fn is_canvas_app(html: &str) -> bool {
457    html.contains("<canvas")
458        || html.contains("getContext('webgl')")
459        || html.contains("getContext(\"webgl\")")
460        || html.contains("getContext('2d')")
461        || html.contains("getContext(\"2d\")")
462        || html.contains("WebGLRenderingContext")
463}
464
465// ── Private helpers ─────────────────────────────────────────────────────────
466
467/// Classify app type from JavaScript state structure.
468fn classify_app_from_state(state: &serde_json::Value) -> CanvasAppType {
469    let state_str = state.to_string().to_lowercase();
470
471    if state_str.contains("spreadsheet")
472        || (state_str.contains("\"cells\"")
473            || (state_str.contains("\"rows\"") && state_str.contains("\"columns\"")))
474    {
475        CanvasAppType::Spreadsheet
476    } else if state_str.contains("\"layers\"")
477        || (state_str.contains("\"canvas\"") && state_str.contains("\"frames\""))
478    {
479        CanvasAppType::DesignTool
480    } else if (state_str.contains("\"lat\"") && state_str.contains("\"lng\""))
481        || state_str.contains("\"latitude\"")
482    {
483        CanvasAppType::Map
484    } else if state_str.contains("\"whiteboard\"")
485        || (state_str.contains("\"board\"") && state_str.contains("\"shapes\""))
486    {
487        CanvasAppType::Whiteboard
488    } else {
489        CanvasAppType::Unknown
490    }
491}
492
493/// Try to extract grid data from a JSON state object.
494fn extract_grid_from_json(state: Option<&serde_json::Value>) -> Option<GridData> {
495    let state = state?;
496
497    // Look for common grid patterns
498    // Pattern 1: { cells: { "A1": { value: "..." }, ... } }
499    if let Some(cells_obj) = state.get("cells").and_then(|v| v.as_object()) {
500        let mut cells = Vec::new();
501        let mut max_row = 0u32;
502        let mut max_col = 0u32;
503        let mut headers = Vec::new();
504
505        for (key, val) in cells_obj {
506            if let Some((row, col)) = parse_cell_ref(key) {
507                let value = val
508                    .get("value")
509                    .or_else(|| val.get("v"))
510                    .and_then(|v| {
511                        if v.is_string() {
512                            v.as_str().map(String::from)
513                        } else {
514                            Some(v.to_string())
515                        }
516                    })
517                    .unwrap_or_default();
518                if !value.is_empty() {
519                    cells.push((row, col, value.clone()));
520                    max_row = max_row.max(row);
521                    max_col = max_col.max(col);
522                    if row == 0 {
523                        headers.push(value);
524                    }
525                }
526            }
527        }
528
529        if !cells.is_empty() {
530            return Some(GridData {
531                rows: max_row + 1,
532                cols: max_col + 1,
533                cells,
534                headers,
535            });
536        }
537    }
538
539    // Pattern 2: { rows: [ { cells: [ { value: "..." } ] } ] }
540    if let Some(rows_arr) = state.get("rows").and_then(|v| v.as_array()) {
541        let mut cells = Vec::new();
542        let mut headers = Vec::new();
543        let mut max_col = 0u32;
544
545        for (ri, row) in rows_arr.iter().enumerate() {
546            if let Some(row_cells) = row.get("cells").and_then(|v| v.as_array()) {
547                for (ci, cell) in row_cells.iter().enumerate() {
548                    let value = cell
549                        .get("value")
550                        .or_else(|| cell.get("v"))
551                        .and_then(|v| {
552                            if v.is_string() {
553                                v.as_str().map(String::from)
554                            } else {
555                                Some(v.to_string())
556                            }
557                        })
558                        .unwrap_or_default();
559                    if !value.is_empty() {
560                        cells.push((ri as u32, ci as u32, value.clone()));
561                        max_col = max_col.max(ci as u32);
562                        if ri == 0 {
563                            headers.push(value);
564                        }
565                    }
566                }
567            }
568        }
569
570        if !cells.is_empty() {
571            return Some(GridData {
572                rows: rows_arr.len() as u32,
573                cols: max_col + 1,
574                cells,
575                headers,
576            });
577        }
578    }
579
580    None
581}
582
583/// Parse a spreadsheet cell reference like "A1" into (row, col).
584fn parse_cell_ref(cell_ref: &str) -> Option<(u32, u32)> {
585    let mut col_part = String::new();
586    let mut row_part = String::new();
587
588    for ch in cell_ref.chars() {
589        if ch.is_ascii_alphabetic() {
590            col_part.push(ch.to_ascii_uppercase());
591        } else if ch.is_ascii_digit() {
592            row_part.push(ch);
593        } else {
594            return None;
595        }
596    }
597
598    if col_part.is_empty() || row_part.is_empty() {
599        return None;
600    }
601
602    // Convert column letters to index (A=0, B=1, ..., Z=25, AA=26, ...)
603    let mut col: u32 = 0;
604    for ch in col_part.chars() {
605        col = col * 26 + (ch as u32 - 'A' as u32 + 1);
606    }
607    col -= 1; // Make 0-indexed
608
609    let row: u32 = row_part.parse::<u32>().ok()?.checked_sub(1)?;
610
611    Some((row, col))
612}
613
614/// Try to extract layer data from a JSON state object.
615fn extract_layers_from_json(state: Option<&serde_json::Value>) -> Option<Vec<Layer>> {
616    let state = state?;
617
618    let layers_arr = state
619        .get("layers")
620        .or_else(|| state.get("document").and_then(|d| d.get("layers")))
621        .or_else(|| state.get("children"))
622        .and_then(|v| v.as_array())?;
623
624    let layers: Vec<Layer> = layers_arr
625        .iter()
626        .filter_map(|l| {
627            let name = l
628                .get("name")
629                .or_else(|| l.get("id"))
630                .and_then(|v| v.as_str())
631                .map(String::from)?;
632            let visible = l.get("visible").and_then(|v| v.as_bool()).unwrap_or(true);
633            let children = l
634                .get("children")
635                .and_then(|v| v.as_array())
636                .map(|arr| {
637                    arr.iter()
638                        .filter_map(|c| {
639                            let label = c
640                                .get("name")
641                                .or_else(|| c.get("id"))
642                                .and_then(|v| v.as_str())
643                                .map(String::from)?;
644                            Some(CanvasElement {
645                                label,
646                                role: c
647                                    .get("type")
648                                    .and_then(|v| v.as_str())
649                                    .unwrap_or("unknown")
650                                    .to_string(),
651                                bounds: None,
652                                action: None,
653                            })
654                        })
655                        .collect()
656                })
657                .unwrap_or_default();
658            Some(Layer {
659                name,
660                visible,
661                children,
662            })
663        })
664        .collect();
665
666    if layers.is_empty() {
667        None
668    } else {
669        Some(layers)
670    }
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676
677    #[test]
678    fn test_is_canvas_app() {
679        assert!(is_canvas_app(
680            "<html><body><canvas id='main'></canvas></body></html>"
681        ));
682        assert!(is_canvas_app("var ctx = el.getContext('2d');"));
683        assert!(!is_canvas_app("<html><body><h1>Hello</h1></body></html>"));
684    }
685
686    #[test]
687    fn test_parse_cell_ref() {
688        assert_eq!(parse_cell_ref("A1"), Some((0, 0)));
689        assert_eq!(parse_cell_ref("B3"), Some((2, 1)));
690        assert_eq!(parse_cell_ref("Z1"), Some((0, 25)));
691        assert_eq!(parse_cell_ref("AA1"), Some((0, 26)));
692        assert_eq!(parse_cell_ref(""), None);
693        assert_eq!(parse_cell_ref("123"), None);
694        assert_eq!(parse_cell_ref("A"), None);
695    }
696
697    #[test]
698    fn test_extract_grid_from_json_cells_pattern() {
699        let state = serde_json::json!({
700            "cells": {
701                "A1": {"value": "Name"},
702                "B1": {"value": "Price"},
703                "A2": {"value": "Widget"},
704                "B2": {"value": "29.99"}
705            }
706        });
707        let grid = extract_grid_from_json(Some(&state)).unwrap();
708        assert_eq!(grid.rows, 2);
709        assert_eq!(grid.cols, 2);
710        assert_eq!(grid.cells.len(), 4);
711    }
712
713    #[test]
714    fn test_extract_grid_from_json_rows_pattern() {
715        let state = serde_json::json!({
716            "rows": [
717                {"cells": [{"value": "Name"}, {"value": "Price"}]},
718                {"cells": [{"value": "Widget"}, {"value": "29.99"}]}
719            ]
720        });
721        let grid = extract_grid_from_json(Some(&state)).unwrap();
722        assert_eq!(grid.rows, 2);
723        assert_eq!(grid.cols, 2);
724        assert_eq!(grid.cells.len(), 4);
725    }
726
727    #[test]
728    fn test_extract_layers_from_json() {
729        let state = serde_json::json!({
730            "layers": [
731                {"name": "Background", "visible": true, "children": [
732                    {"name": "Logo", "type": "image"}
733                ]},
734                {"name": "Content", "visible": true, "children": [
735                    {"name": "Title", "type": "text"},
736                    {"name": "Button", "type": "button"}
737                ]}
738            ]
739        });
740        let layers = extract_layers_from_json(Some(&state)).unwrap();
741        assert_eq!(layers.len(), 2);
742        assert_eq!(layers[0].name, "Background");
743        assert_eq!(layers[0].children.len(), 1);
744        assert_eq!(layers[1].children.len(), 2);
745    }
746
747    #[test]
748    fn test_classify_app_from_state() {
749        let spreadsheet = serde_json::json!({"cells": {}, "rows": [], "columns": []});
750        assert_eq!(
751            classify_app_from_state(&spreadsheet),
752            CanvasAppType::Spreadsheet
753        );
754
755        let map = serde_json::json!({"center": {"lat": 40.7, "lng": -74.0}});
756        assert_eq!(classify_app_from_state(&map), CanvasAppType::Map);
757
758        let unknown = serde_json::json!({"foo": "bar"});
759        assert_eq!(classify_app_from_state(&unknown), CanvasAppType::Unknown);
760    }
761
762    #[test]
763    fn test_empty_state() {
764        assert!(extract_grid_from_json(None).is_none());
765        assert!(extract_layers_from_json(None).is_none());
766    }
767}