ggsql 0.1.8

A declarative visualization language that extends SQL with powerful data visualization capabilities.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
/*!
# ggsql - SQL Visualization Grammar

A SQL extension for declarative data visualization based on the Grammar of Graphics.

ggsql allows you to write queries that combine SQL data retrieval with visualization
specifications in a single, composable syntax.

## Example

```sql
SELECT date, revenue, region
FROM sales
WHERE year = 2024
VISUALISE date AS x, revenue AS y, region AS color
DRAW line
LABEL title => 'Sales by Region'
THEME minimal
```

## Architecture

ggsql splits queries at the `VISUALISE` boundary:
- **SQL portion** → passed to pluggable readers (DuckDB, PostgreSQL, CSV, etc.)
- **VISUALISE portion** → parsed and compiled into visualization specifications
- **Output** → rendered via pluggable writers (ggplot2, PNG, Vega-Lite, etc.)

## Core Components

- [`parser`] - Query parsing and AST generation
- [`engine`] - Core execution engine
- [`readers`] - Data source abstraction layer
- [`writers`] - Output format abstraction layer
*/

// Allow complex types in test code (e.g., test case tuples with many elements)
#![cfg_attr(test, allow(clippy::type_complexity))]

pub mod format;
pub mod naming;
pub mod parser;
pub mod plot;

pub mod reader;

#[cfg(any(feature = "vegalite", feature = "ggplot2", feature = "plotters"))]
pub mod writer;

pub mod execute;

pub mod validate;

// Re-export key types for convenience
pub use plot::{
    AestheticValue, DataSource, Facet, FacetLayout, Geom, Layer, Mappings, Plot, Scale,
    SqlExpression,
};

// Re-export aesthetic classification utilities
pub use plot::aesthetic::{
    is_positional_aesthetic, AestheticContext, NON_POSITIONAL, POSITIONAL_SUFFIXES,
};

// Future modules - not yet implemented
// #[cfg(feature = "engine")]
// pub mod engine;

// DataFrame abstraction (wraps Polars)
pub use polars::prelude::DataFrame;

/// Main library error type
#[derive(thiserror::Error, Debug)]
pub enum GgsqlError {
    #[error("Parse error: {0}")]
    ParseError(String),

    #[error("Validation error: {0}")]
    ValidationError(String),

    #[error("Data source error: {0}")]
    ReaderError(String),

    #[error("Output generation error: {0}")]
    WriterError(String),

    #[error("Internal error: {0}")]
    InternalError(String),
}

pub type Result<T> = std::result::Result<T, GgsqlError>;

/// Version information
pub const VERSION: &str = env!("CARGO_PKG_VERSION");

#[cfg(test)]
#[cfg(all(feature = "duckdb", feature = "vegalite"))]
mod integration_tests {
    use super::*;
    use crate::plot::{AestheticValue, Geom, Layer};
    use crate::reader::{DuckDBReader, Reader};
    use crate::writer::{VegaLiteWriter, Writer};
    use std::collections::HashMap;

    /// Helper to wrap a DataFrame in a data map for testing (uses layer 0 key)
    fn wrap_data(df: DataFrame) -> HashMap<String, DataFrame> {
        let mut data_map = HashMap::new();
        data_map.insert(naming::layer_key(0), df);
        data_map
    }

    #[test]
    fn test_end_to_end_date_type_preservation() {
        // Test complete pipeline: DuckDB → DataFrame (Date type) → VegaLite (temporal)

        // Create in-memory DuckDB with date data
        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Execute SQL with DATE type
        let sql = r#"
            SELECT
                DATE '2024-01-01' + INTERVAL (n) DAY as date,
                n * 10 as revenue
            FROM generate_series(0, 4) as t(n)
        "#;

        let df = reader.execute_sql(sql).unwrap();

        // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL)
        assert_eq!(df.get_column_names(), vec!["date", "revenue"]);
        let date_col = df.column("date").unwrap();
        // DATE + INTERVAL returns Datetime in DuckDB, which is still temporal
        assert!(matches!(
            date_col.dtype(),
            polars::prelude::DataType::Date | polars::prelude::DataType::Datetime(_, _)
        ));

        // Create visualization spec
        let mut spec = Plot::new();
        let layer = Layer::new(Geom::line())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("date".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("revenue".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        // Generate Vega-Lite JSON
        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // CRITICAL ASSERTION: x-axis should be automatically inferred as "temporal"
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "temporal");
        assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");

        // Data values should be ISO temporal strings
        // (DuckDB returns Datetime for DATE + INTERVAL, so we get ISO datetime format)
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        let date_str = data_values[0]["date"].as_str().unwrap();
        assert!(
            date_str.starts_with("2024-01-01"),
            "Expected date starting with 2024-01-01, got {}",
            date_str
        );
    }

    #[test]
    fn test_end_to_end_datetime_type_preservation() {
        // Test complete pipeline: DuckDB → DataFrame (Datetime type) → VegaLite (temporal)

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Execute SQL with TIMESTAMP type
        let sql = r#"
            SELECT
                TIMESTAMP '2024-01-01 00:00:00' + INTERVAL (n) HOUR as timestamp,
                n * 5 as value
            FROM generate_series(0, 3) as t(n)
        "#;

        let df = reader.execute_sql(sql).unwrap();

        // Verify DataFrame has Datetime type
        let timestamp_col = df.column("timestamp").unwrap();
        assert!(matches!(
            timestamp_col.dtype(),
            polars::prelude::DataType::Datetime(_, _)
        ));

        // Create visualization spec
        let mut spec = Plot::new();
        let layer = Layer::new(Geom::area())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("timestamp".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("value".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        // Generate Vega-Lite JSON
        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // x-axis should be automatically inferred as "temporal"
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "temporal");

        // Data values should be ISO datetime strings
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        assert!(data_values[0]["timestamp"]
            .as_str()
            .unwrap()
            .starts_with("2024-01-01T"));
    }

    #[test]
    fn test_end_to_end_numeric_type_preservation() {
        // Test that numeric types are preserved (not converted to strings)

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Real SQL that users would write
        let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col";
        let df = reader.execute_sql(sql).unwrap();

        // Verify types are preserved
        // DuckDB treats numeric literals as DECIMAL, which we convert to Float64
        assert!(matches!(
            df.column("int_col").unwrap().dtype(),
            polars::prelude::DataType::Int32
        ));
        assert!(matches!(
            df.column("float_col").unwrap().dtype(),
            polars::prelude::DataType::Float64
        ));
        assert!(matches!(
            df.column("bool_col").unwrap().dtype(),
            polars::prelude::DataType::Boolean
        ));

        // Create visualization spec
        let mut spec = Plot::new();
        let layer = Layer::new(Geom::point())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("int_col".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("float_col".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        // Generate Vega-Lite JSON
        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // Types should be inferred as quantitative
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "quantitative");
        assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");

        // Data values should be numbers (not strings!)
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        assert_eq!(data_values[0]["int_col"], 1);
        assert_eq!(data_values[0]["float_col"], 2.5);
        assert_eq!(data_values[0]["bool_col"], true);
    }

    #[test]
    fn test_end_to_end_mixed_types_with_nulls() {
        // Test that NULLs are handled correctly across different types

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)";
        let df = reader.execute_sql(sql).unwrap();

        // Verify types
        assert!(matches!(
            df.column("int_col").unwrap().dtype(),
            polars::prelude::DataType::Int32
        ));
        assert!(matches!(
            df.column("float_col").unwrap().dtype(),
            polars::prelude::DataType::Float64
        ));
        assert!(matches!(
            df.column("str_col").unwrap().dtype(),
            polars::prelude::DataType::String
        ));

        // Create viz spec
        let mut spec = Plot::new();
        let layer = Layer::new(Geom::point())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("int_col".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("float_col".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // Check null handling in JSON
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        assert_eq!(data_values[0]["int_col"], 1);
        assert_eq!(data_values[0]["float_col"], 2.5);
        assert_eq!(data_values[1]["float_col"], serde_json::Value::Null);
        assert_eq!(data_values[2]["int_col"], serde_json::Value::Null);
    }

    #[test]
    fn test_end_to_end_string_vs_categorical() {
        // Test that string columns are inferred as nominal type

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)";
        let df = reader.execute_sql(sql).unwrap();

        let mut spec = Plot::new();
        let layer = Layer::new(Geom::bar())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("category".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("value".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // String columns should be inferred as nominal
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "nominal");
        assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");
    }

    #[test]
    fn test_end_to_end_time_series_aggregation() {
        // Test realistic time series query with aggregation

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Create sample sales data and aggregate by day
        let sql = r#"
            WITH sales AS (
                SELECT
                    TIMESTAMP '2024-01-01 00:00:00' + INTERVAL (n) HOUR as sale_time,
                    (n % 3) as product_id,
                    10 + (n % 5) as amount
                FROM generate_series(0, 23) as t(n)
            )
            SELECT
                DATE_TRUNC('day', sale_time) as day,
                SUM(amount) as total_sales,
                COUNT(*) as num_sales
            FROM sales
            GROUP BY day
        "#;

        let df = reader.execute_sql(sql).unwrap();

        // Verify temporal type is preserved through aggregation
        // DATE_TRUNC returns Date type (not Datetime)
        let day_col = df.column("day").unwrap();
        assert!(matches!(
            day_col.dtype(),
            polars::prelude::DataType::Date | polars::prelude::DataType::Datetime(_, _)
        ));

        let mut spec = Plot::new();
        let layer = Layer::new(Geom::line())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("day".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("total_sales".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // x-axis should be temporal
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "temporal");
        assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");
    }

    #[test]
    fn test_end_to_end_decimal_precision() {
        // Test that DECIMAL values with various precisions are correctly converted

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large";
        let df = reader.execute_sql(sql).unwrap();

        // All should be Float64
        assert!(matches!(
            df.column("small").unwrap().dtype(),
            polars::prelude::DataType::Float64
        ));
        assert!(matches!(
            df.column("medium").unwrap().dtype(),
            polars::prelude::DataType::Float64
        ));
        assert!(matches!(
            df.column("large").unwrap().dtype(),
            polars::prelude::DataType::Float64
        ));

        let mut spec = Plot::new();
        let layer = Layer::new(Geom::point())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("small".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("medium".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // Check values are preserved
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        let small_val = data_values[0]["small"].as_f64().unwrap();
        let medium_val = data_values[0]["medium"].as_f64().unwrap();
        let large_val = data_values[0]["large"].as_f64().unwrap();

        assert!((small_val - 0.1).abs() < 0.001);
        assert!((medium_val - 123.456).abs() < 0.001);
        assert!((large_val - 999999.999999).abs() < 0.001);
    }

    #[test]
    fn test_end_to_end_integer_types() {
        // Test that different integer types are preserved

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big";
        let df = reader.execute_sql(sql).unwrap();

        // Verify types
        assert!(matches!(
            df.column("tiny").unwrap().dtype(),
            polars::prelude::DataType::Int8
        ));
        assert!(matches!(
            df.column("small").unwrap().dtype(),
            polars::prelude::DataType::Int16
        ));
        assert!(matches!(
            df.column("int").unwrap().dtype(),
            polars::prelude::DataType::Int32
        ));
        assert!(matches!(
            df.column("big").unwrap().dtype(),
            polars::prelude::DataType::Int64
        ));

        let mut spec = Plot::new();
        let layer = Layer::new(Geom::bar())
            .with_aesthetic(
                "x".to_string(),
                AestheticValue::standard_column("int".to_string()),
            )
            .with_aesthetic(
                "y".to_string(),
                AestheticValue::standard_column("big".to_string()),
            );
        spec.layers.push(layer);

        // Transform aesthetics from user-facing (x, y) to internal (pos1, pos2)
        spec.initialize_aesthetic_context();
        spec.transform_aesthetics_to_internal();

        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // All integer types should be quantitative
        assert_eq!(vl_spec["layer"][0]["encoding"]["x"]["type"], "quantitative");
        assert_eq!(vl_spec["layer"][0]["encoding"]["y"]["type"], "quantitative");

        // Check values
        let data_values = vl_spec["data"]["values"].as_array().unwrap();
        assert_eq!(data_values[0]["tiny"], 1);
        assert_eq!(data_values[0]["small"], 1000);
        assert_eq!(data_values[0]["int"], 1000000);
        assert_eq!(data_values[0]["big"], 1000000000000i64);
    }

    #[test]
    fn test_end_to_end_constant_mappings() {
        // Test that constant values in MAPPING clauses work correctly
        // Constants are injected as aesthetic-named columns in each layer's data
        // With unified data approach, all layers are merged into one dataset with source filtering

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Query with layer-level constants
        let query = r#"
            SELECT 1 as x, 10 as y
            VISUALISE x, y
            DRAW line MAPPING 'value' AS linetype
            DRAW point MAPPING 'value2' AS shape
        "#;

        // Prepare data - this parses and processes the query
        let prepared = execute::prepare_data_with_reader(query, &reader).unwrap();

        // Each layer has its own data (different constants = different queries)
        assert_eq!(prepared.specs.len(), 1);

        // Layer 0 should have linetype column
        let layer0_key = prepared.specs[0].layers[0]
            .data_key
            .as_ref()
            .expect("Layer 0 should have data_key");
        let layer0_df = prepared.data.get(layer0_key).unwrap();
        let linetype_col = naming::aesthetic_column("linetype");
        let layer0_cols = layer0_df.get_column_names();
        assert!(
            layer0_cols.iter().any(|c| c.as_str() == linetype_col),
            "Layer 0 should have linetype column '{}': {:?}",
            linetype_col,
            layer0_cols
        );

        // Layer 1 should have shape column
        let layer1_key = prepared.specs[0].layers[1]
            .data_key
            .as_ref()
            .expect("Layer 1 should have data_key");
        let layer1_df = prepared.data.get(layer1_key).unwrap();
        let shape_col = naming::aesthetic_column("shape");
        let layer1_cols = layer1_df.get_column_names();
        assert!(
            layer1_cols.iter().any(|c| c.as_str() == shape_col),
            "Layer 1 should have shape column '{}': {:?}",
            shape_col,
            layer1_cols
        );

        // Generate Vega-Lite
        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // Verify we have two layers
        assert_eq!(vl_spec["layer"].as_array().unwrap().len(), 2);

        // Verify the aesthetic is mapped to prefixed aesthetic-named columns
        // Note: linetype is mapped to Vega-Lite's strokeDash channel
        let layer0_linetype = &vl_spec["layer"][0]["encoding"]["strokeDash"];
        let layer1_shape = &vl_spec["layer"][1]["encoding"]["shape"];

        assert_eq!(
            layer0_linetype["field"].as_str().unwrap(),
            linetype_col,
            "Layer 0 linetype should map to prefixed aesthetic-named column"
        );
        assert_eq!(
            layer1_shape["field"].as_str().unwrap(),
            shape_col,
            "Layer 1 shape should map to prefixed aesthetic-named column"
        );

        // With unified data approach, all data is in a single dataset
        // Each row has __ggsql_source__ identifying which layer's data it belongs to
        let global_data = &vl_spec["data"]["values"];
        assert!(
            global_data.is_array(),
            "Should have unified global data array"
        );
        let global_rows = global_data.as_array().unwrap();

        // Find rows for each layer by their source field
        let layer0_rows: Vec<_> = global_rows
            .iter()
            .filter(|r| r[naming::SOURCE_COLUMN] == layer0_key.as_str())
            .collect();
        let layer1_rows: Vec<_> = global_rows
            .iter()
            .filter(|r| r[naming::SOURCE_COLUMN] == layer1_key.as_str())
            .collect();

        assert!(!layer0_rows.is_empty(), "Should have layer 0 rows");
        assert!(!layer1_rows.is_empty(), "Should have layer 1 rows");

        // Verify constant values
        assert_eq!(
            layer0_rows[0][&linetype_col], "value",
            "Layer 0 linetype constant should be 'value'"
        );
        assert_eq!(
            layer1_rows[0][&shape_col], "value2",
            "Layer 1 shape constant should be 'value2'"
        );
    }

    #[test]
    fn test_end_to_end_facet_with_constant_strokes() {
        // Test faceting with multiple layers that have constant stroke mappings
        // This verifies the fix for faceting compatibility with constants

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Create test data with multiple groups for faceting
        reader
            .connection()
            .execute(
                "CREATE TABLE facet_test AS SELECT * FROM (VALUES
                    ('2023-01-01'::DATE, 100.0, 50, 'North', 'A'),
                    ('2023-02-01'::DATE, 120.0, 60, 'North', 'A'),
                    ('2023-01-01'::DATE, 80.0, 40, 'South', 'B'),
                    ('2023-02-01'::DATE, 90.0, 45, 'South', 'B')
                ) AS t(month, revenue, quantity, region, category)",
                duckdb::params![],
            )
            .unwrap();

        // Query with multiple constant-colored layers and faceting
        let query = r#"
            SELECT month, region, category, revenue, quantity * 10 as qty_scaled
            FROM facet_test
            VISUALISE month AS x
            DRAW line MAPPING revenue AS y, 'value' AS stroke
            DRAW point MAPPING revenue AS y, 'value2' AS stroke SETTING size => 30
            DRAW line MAPPING qty_scaled AS y, 'value3' AS stroke
            DRAW point MAPPING qty_scaled AS y, 'value4' AS stroke SETTING size => 30
            FACET region BY category
        "#;

        let prepared = execute::prepare_data_with_reader(query, &reader).unwrap();

        // With aesthetic-named columns, each layer gets its own data
        // Each layer should have its data with prefixed aesthetic-named columns
        // Note: x and y are transformed to internal names pos1 and pos2
        let x_col = naming::aesthetic_column("pos1");
        let y_col = naming::aesthetic_column("pos2");
        let stroke_col = naming::aesthetic_column("stroke");
        for layer_idx in 0..4 {
            let layer_key = naming::layer_key(layer_idx);
            assert!(
                prepared.data.contains_key(&layer_key),
                "Should have layer {} data",
                layer_idx
            );

            let layer_df = prepared.data.get(&layer_key).unwrap();
            let col_names = layer_df.get_column_names();

            // Each layer should have prefixed aesthetic-named columns
            assert!(
                col_names.iter().any(|c| c.as_str() == x_col),
                "Layer {} should have '{}' column: {:?}",
                layer_idx,
                x_col,
                col_names
            );
            assert!(
                col_names.iter().any(|c| c.as_str() == y_col),
                "Layer {} should have '{}' column: {:?}",
                layer_idx,
                y_col,
                col_names
            );
            // Stroke constant becomes a column named with prefixed aesthetic name
            assert!(
                col_names.iter().any(|c| c.as_str() == stroke_col),
                "Layer {} should have '{}' column: {:?}",
                layer_idx,
                stroke_col,
                col_names
            );
            // Facet aesthetic columns should be included (facet1 and facet2 for grid facet)
            // Note: row→facet1, column→facet2 after internal naming transformation
            let facet1_col = naming::aesthetic_column("facet1");
            let facet2_col = naming::aesthetic_column("facet2");
            assert!(
                col_names.iter().any(|c| c.as_str() == facet1_col),
                "Layer {} should have '{}' facet column: {:?}",
                layer_idx,
                facet1_col,
                col_names
            );
            assert!(
                col_names.iter().any(|c| c.as_str() == facet2_col),
                "Layer {} should have '{}' facet column: {:?}",
                layer_idx,
                facet2_col,
                col_names
            );
        }

        // Note: With the new aesthetic-named columns approach, each layer has its own data.
        // Faceting with multiple data sources requires query deduplication (Phase 7 of the plan).
        // For now, we verify that the data structure is correct.
        // Query deduplication will enable: identical layer queries → shared data → faceting works.

        // Verify the spec has the facet configuration
        assert!(
            prepared.specs[0].facet.is_some(),
            "Spec should have facet configuration"
        );
    }

    #[test]
    fn test_end_to_end_global_constant_in_visualise() {
        // Test that global constants in VISUALISE clause work correctly
        // e.g., VISUALISE date AS x, value AS y, 'value' AS stroke

        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();

        // Create test data
        reader
            .connection()
            .execute(
                "CREATE TABLE timeseries AS SELECT * FROM (VALUES
                    ('2023-01-01'::DATE, 100.0),
                    ('2023-01-08'::DATE, 110.0),
                    ('2023-01-15'::DATE, 105.0)
                ) AS t(date, value)",
                duckdb::params![],
            )
            .unwrap();

        // Query with global constant stroke in VISUALISE clause
        let query = r#"
            SELECT date, value FROM timeseries
            VISUALISE date AS x, value AS y, 'value' AS stroke
            DRAW line
            DRAW point SETTING size => 50
        "#;

        let prepared = execute::prepare_data_with_reader(query, &reader).unwrap();

        // Each layer should have a data_key
        let layer0_key = prepared.specs[0].layers[0]
            .data_key
            .as_ref()
            .expect("Layer 0 should have data_key");
        let _layer1_key = prepared.specs[0].layers[1]
            .data_key
            .as_ref()
            .expect("Layer 1 should have data_key");

        // Both layers have data (may be shared or separate depending on query dedup)
        // Verify layer 0 has the expected columns
        // Note: x and y are transformed to internal names pos1 and pos2
        let x_col = naming::aesthetic_column("pos1");
        let y_col = naming::aesthetic_column("pos2");
        let stroke_col = naming::aesthetic_column("stroke");

        let layer_df = prepared.data.get(layer0_key).unwrap();
        let col_names = layer_df.get_column_names();

        assert!(
            col_names.iter().any(|c| c.as_str() == x_col),
            "Should have '{}' column: {:?}",
            x_col,
            col_names
        );
        assert!(
            col_names.iter().any(|c| c.as_str() == y_col),
            "Should have '{}' column: {:?}",
            y_col,
            col_names
        );
        assert!(
            col_names.iter().any(|c| c.as_str() == stroke_col),
            "Should have '{}' column: {:?}",
            stroke_col,
            col_names
        );

        // Generate Vega-Lite and verify it works
        let writer = VegaLiteWriter::new();
        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
        let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();

        // Both layers should have stroke field-mapped to prefixed aesthetic-named column
        assert_eq!(vl_spec["layer"].as_array().unwrap().len(), 2);
        assert_eq!(
            vl_spec["layer"][0]["encoding"]["stroke"]["field"]
                .as_str()
                .unwrap(),
            stroke_col
        );
        assert_eq!(
            vl_spec["layer"][1]["encoding"]["stroke"]["field"]
                .as_str()
                .unwrap(),
            stroke_col
        );

        // With unified data approach, all data is in the data.values array
        // Verify the stroke value appears in the unified data
        let global_data = vl_spec["data"]["values"]
            .as_array()
            .expect("Should have unified global data");

        // Find rows belonging to layer 0 (filter by source)
        let layer0_rows: Vec<_> = global_data
            .iter()
            .filter(|r| r[naming::SOURCE_COLUMN] == layer0_key.as_str())
            .collect();
        assert!(!layer0_rows.is_empty(), "Should have layer data rows");
        assert_eq!(layer0_rows[0][&stroke_col], "value");
    }
}