sqlite-zstd 0.3.5

Extension for sqlite that provides transparent dictionary-based row-level compression for sqlite
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
use crate::{util::*, *};
use anyhow::Context as AContext;
use rusqlite::OptionalExtension;
use rusqlite::functions::Context;
use rusqlite::types::ToSqlOutput;
use rusqlite::types::Value;
use rusqlite::{named_params, params};
use std::collections::HashMap;
use std::time::{Duration, Instant};

// the output will be without magic header, without checksums, and without dictids. This will save 4 bytes when not using dictionaries and 8 bytes when using dictionaries.
// this also means the data will not be decodeable as a normal zstd archive with the standard tools
static COMPACT: bool = true;
#[derive(Debug)]
struct ColumnInfo {
    name: String,
    coltype: String,
    is_primary_key: bool,
    to_compress: bool,
    is_dict_id: bool,
}

fn def_min_dict_size() -> i64 {
    5000
}
fn def_dict_size_ratio() -> f32 {
    0.01
}
fn def_train_dict_samples_ratio() -> f32 {
    100.0
}
fn def_incremental_compression_step_bytes() -> i64 {
    // https://github.com/facebook/zstd/blob/dev/doc/images/CSpeed2.png
    // about 5MB/s at level 19
    5_000_000 / 3
}

/// This is the configuration of the transparent compression for one column of one table.
/// It is safe to change every property of this configuration at any time except for table and column, but data that is already compressed will not be recompressed with the new settings.
/// You can update the config e.g. using SQL: `update _zstd_configs set config = json_patch(config, '{"target_db_load": 1}');`
///
/// Note that the configuration is assumed to be trusted. For example, dict_chooser can probably used for SQL injection.
#[derive(serde::Serialize, serde::Deserialize)]
pub struct TransparentCompressConfig {
    /// the name of the table to which the transparent compression will be applied. It will be renamed to _tblname_zstd and replaced with a editable view.
    pub table: String,
    /// the name of the column
    pub column: String,
    /// The compression level. Valid levels are 1-19.
    /// Compression will be significantly slower when the level is increased, but decompression speed should stay about the same regardless of compression level.
    /// That means this is a tradeoff between zstd_incremental_maintenance vs SELECT performance.
    pub compression_level: i8,
    /// An SQL expression that chooses which dict to use or returns null if data should stay uncompressed for now
    /// Examples:
    ///
    /// * `'a'`
    ///   This will cause a single dictionary to be trained for everything.
    ///
    /// * `strftime(created, '%Y-%m')`
    ///   This will cause every month of data to be compressed with its own dictionary.
    ///
    /// * `nullif(strftime(created, '%Y-%m'), strftime('now', '%Y-%m'))`
    ///
    ///   The same as above, but if the entry is from the current month it will stay uncompressed.
    ///   This is handy because it means that the dictionary for the month will only be created when the month is over
    ///   and can thus be optimized the most for the given data
    /// * `case when date(timestamp, ''weekday 0'') < date(''now'', ''weekday 0'') then data_type || ''.'' || date(timestamp, ''weekday 0'') else null end`
    ///
    ///   This one uses keys like data_type.2020-11-01` where the date is the first day of the week, except for the current week which stays uncompressed.
    ///   This means that every different data_type will be compressed separately and separately for each week.
    ///
    /// You can return the special string `[nodict]` to compress the given data without a dictionary.
    /// Note that the compression key is global for all tables. So if you want your dict to only apply to this table return
    /// something like `"tablename." || strftime(...)`.
    pub dict_chooser: String,
    #[serde(default = "def_min_dict_size")]
    /// if dictionary size would be smaller than this then no dict will be trained and if no dict exists the data will stay uncompressed
    pub min_dict_size_bytes_for_training: i64,
    #[serde(default = "def_dict_size_ratio")]
    /// The target size of the dictionary based on seen data. For example,
    /// For example if we see 10MB of data for a specific group, the dict will target a size of ratio * 10MB (default 0.01)
    pub dict_size_ratio: f32,
    /// for training, we find samples of this factor (default 100)
    /// the default of 100 and 0.01 means that by default the dict will be trained on all of the data
    #[serde(default = "def_train_dict_samples_ratio")]
    pub train_dict_samples_ratio: f32,
    /// how many bytes (approximately) to compress at once. By default tuned so at compression level 19 it locks the database for about 0.3s per step.
    #[serde(default = "def_incremental_compression_step_bytes")]
    pub incremental_compression_step_bytes: i64,
}

pub fn pretty_bytes(bytes: i64) -> String {
    if bytes >= 1_000_000_000 {
        format!("{:.2}GB", bytes as f64 / 1e9)
    } else if bytes >= 1_000_000 {
        format!("{:.2}MB", bytes as f64 / 1e6)
    } else if bytes >= 1_000 {
        format!("{:.2}kB", bytes as f64 / 1e3)
    } else {
        format!("{bytes}B")
    }
}

#[derive(Debug)]
enum SqliteAffinity {
    Integer,
    Text,
    Blob,
    Real,
    Numeric,
}
/// determine affinity, algorithm described at https://www.sqlite.org/draft/datatype3.html#determination_of_column_affinity
fn get_column_affinity(declared_type: &str) -> SqliteAffinity {
    use SqliteAffinity::*;
    let typ = declared_type.to_ascii_lowercase();
    if typ.contains("int") {
        Integer
    } else if typ.contains("char") || typ.contains("clob") || typ.contains("text") {
        Text
    } else if typ.contains("blob") || typ.is_empty() {
        Blob
    } else if typ.contains("real") || typ.contains("floa") || typ.contains("doub") {
        Real
    } else {
        Numeric
    }
}

fn show_warnings(db: &Connection) -> anyhow::Result<()> {
    // warnings
    let journal_mode: String = db
        .query_row("pragma journal_mode;", params![], |r| r.get(0))
        .context("querying journal mode")?;
    if journal_mode != "wal" {
        log::warn!("Warning: It is recommended to set `pragma journal_mode=WAL;`");
    }
    let vacuum_mode: i32 = db
        .query_row("pragma auto_vacuum;", params![], |r| r.get(0))
        .context("querying vacuum mode")?;
    if vacuum_mode != 1 {
        log::warn!("Warning: It is recommended to set `pragma auto_vacuum=full;`");
    }
    let busy_timeout: i32 = db
        .query_row("pragma busy_timeout;", params![], |r| r.get(0))
        .context("querying busy timeout")?;
    if busy_timeout == 0 {
        log::warn!("Warning: It is recommended to set `pragma busy_timeout=2000;` or higher");
    }
    Ok(())
}
///
/// enables transparent row-level compression for a table with the following steps:
///
/// 1. renames tablename to _tablename_zstd if table is not already enabled
/// 2. creates a view called tablename that mirrors _tablename_zstd except it decompresses the compressed column on the fly
/// 3. creates INSERT, UPDATE and DELETE triggers on the view so they affect the backing table instead
///
/// Warning: this function assumes trusted input, it is not sql injection safe!
pub fn zstd_enable_transparent<'a>(ctx: &Context) -> anyhow::Result<ToSqlOutput<'a>> {
    let arg_config = 0;

    let config_str: String = ctx.get(arg_config)?;
    let config: TransparentCompressConfig = serde_json::from_str(&config_str)
        .with_context(|| format!("parsing json config '{config_str}'"))?;
    let db = &mut unsafe { ctx.get_connection()? };
    let db = db
        .unchecked_transaction()
        .context("Could not start transaction")?;
    let table_name = &config.table;
    let new_table_name = format!("_{table_name}_zstd");

    let configs = get_configs(&db)?;
    let already_compressed_columns = configs
        .iter()
        .filter(|c| &c.table == table_name)
        .map(|c| &c.column[..])
        .collect::<Vec<&str>>();

    log::debug!(
        "already compressed columns={:?}",
        already_compressed_columns
    );

    if already_compressed_columns.contains(&&config.column[..]) {
        anyhow::bail!(
            "Column {} is already enabled for compression.",
            &config.column
        );
    }

    let table_already_enabled = !already_compressed_columns.is_empty();

    let dict_id_columns: Vec<String> = if table_already_enabled {
        let query = format!(
            r#"select "from"
            from pragma_foreign_key_list('{}')
            where "table" = '_zstd_dicts'"#,
            &new_table_name
        );
        log::debug!("dict_id_columns query {:?}", query);
        db.prepare(&query)?
            .query_map(params![], |row| row.get("from"))
            .context("Could not get dicts ids info")?
            .collect::<Result<Vec<String>, _>>()?
    } else {
        vec![]
    };

    log::debug!("dict_id columns={:?}", dict_id_columns);

    if !check_table_exists(
        &db,
        if table_already_enabled {
            &new_table_name
        } else {
            table_name
        },
    ) {
        anyhow::bail!("Table {} doesn't exist", table_name);
    }

    let columns_info: Vec<ColumnInfo> = db
        .prepare(&format_sqlite!(
            r#"pragma table_info({})"#,
            if table_already_enabled {
                &new_table_name
            } else {
                table_name
            }
        ))?
        .query_map(params![], |row| {
            let col_name: String = row.get("name")?;
            let to_compress = (col_name == config.column)
                || (already_compressed_columns.contains(&&col_name[..]));
            let is_dict_id = dict_id_columns.contains(&col_name);
            Ok(ColumnInfo {
                name: col_name,
                is_primary_key: row.get("pk")?,
                coltype: row.get("type")?,
                to_compress,
                is_dict_id,
            })
        })
        .context("Could not query table_info")?
        .collect::<Result<_, rusqlite::Error>>()?;

    show_warnings(&db)?;

    // primary key columns. these will be used to index the table in the modifying triggers
    let primary_key_columns: Vec<&ColumnInfo> =
        columns_info.iter().filter(|e| e.is_primary_key).collect();

    if columns_info.is_empty() {
        anyhow::bail!("Table {} does not exist", table_name);
    }
    if primary_key_columns.is_empty() {
        anyhow::bail!(
            "Table {} does not have a primary key, sqlite-zstd only works on tables with primary keys, since rowids can change on VACUUM.",
            table_name
        );
    }

    let column_name = &config.column;

    let to_compress_column = columns_info
        .iter()
        .find(|e| &e.name == column_name)
        .with_context(|| format!("Column {column_name} does not exist in {table_name}"))?;
    if to_compress_column.is_primary_key {
        anyhow::bail!(
            "Can't compress column {} since it is part of primary key (this could probably be supported, but currently isn't)",
            column_name
        );
    }

    check_columns_to_compress_are_not_indexed(&db, &columns_info, table_name)?;

    let dict_id_column_name = get_dict_id(&to_compress_column.name);
    log::debug!("cols={:?}", columns_info);

    {
        let query = format!(
            "select ({}) as dict_chooser from {} limit 1",
            config.dict_chooser,
            escape_sqlite_identifier(table_name)
        );
        // small sanity check of chooser statement
        db.query_row(&query, params![], |row| row.get::<_, String>(0))
            .optional()
            .with_context(|| format!("Tried to execute:\n{query}"))
            .context(r#"Dict chooser expression does not seem to be valid. Make sure you return a string and get your escaping right: If you want an sqlite string inside a json string inside a sqlite string you need to do '{"foo": "''bar''"}'"#)?;
    }
    {
        // can't use prepared statement at these positions
        if !table_already_enabled {
            let rename_query =
                format_sqlite!("alter table {} rename to {}", table_name, &new_table_name);
            log::debug!("[run] {}", &rename_query);
            db.execute(&rename_query, params![])
                .context("Could not rename table")?;
        }

        util::ensure_dicts_table_exists(&db)?;

        db.execute(
            "
            create table if not exists _zstd_configs (
                id integer primary key autoincrement,
                config json not null
            );",
            params![],
        )
        .context("Could not create _zstd_configs")?;

        db.execute(
            "insert into _zstd_configs (config) values (?)",
            params![config_str],
        )
        .context("Could not insert config")?;

        db.execute(
            &format_sqlite!(
                "alter table {} add column {} integer default null references _zstd_dicts(id)",
                &new_table_name,
                &dict_id_column_name
            ),
            params![],
        )
        .context("Could not add dictid column")?;

        // this index is needed since the maintenance function queries by the dictionary id to find rows that are not compressed
        db.execute(
            &format_sqlite!(
                "create index {} on {} ({})",
                &format!("{}_idx", &dict_id_column_name),
                &new_table_name,
                &dict_id_column_name
            ),
            params![],
        )
        .context("Could not create index on dictid")?;
    }

    create_or_replace_view(
        &db,
        &columns_info,
        table_name,
        &new_table_name,
        table_already_enabled,
    )?;

    create_insert_trigger(&db, &columns_info, table_name, &new_table_name, &config)?;

    // a WHERE statement that selects a row based on the primary key
    let primary_key_condition = primary_key_columns
        .iter()
        .map(|c| format_sqlite!("old.{0} = {0}", &c.name))
        .collect::<Vec<String>>()
        .join(" and ");

    // add delete trigger
    create_delete_trigger(&db, table_name, &new_table_name, &primary_key_condition)?;

    // update trigger
    create_update_triggers(
        &db,
        &columns_info,
        table_name,
        &new_table_name,
        &primary_key_condition,
        &config,
    )?;

    db.commit().context("Could not commit transaction")?;
    Ok(ToSqlOutput::Owned(Value::Text("Done!".to_string())))
}

fn get_dict_id(column_name: &str) -> String {
    format!("_{column_name}_dict")
}

fn check_table_exists(db: &rusqlite::Connection, table_name: &str) -> bool {
    let table_count: u32 = db
        .query_row(
            "select count(`type`) from sqlite_master where name = ? and type = 'table'",
            params![table_name],
            |r| r.get(0),
        )
        .unwrap_or(0);
    table_count != 0
}

fn check_columns_to_compress_are_not_indexed(
    db: &rusqlite::Connection,
    columns_info: &[ColumnInfo],
    table_name: &str,
) -> anyhow::Result<()> {
    let indexed_columns: HashMap<String, String> = db
        .prepare(
            "
            select distinct ii.name as column_name, il.name as index_name
            from sqlite_master as m,
            pragma_index_list(m.name) as il,
            pragma_index_info(il.name) as ii
            where m.type='table' AND m.name=?",
        )?
        .query_map(params![table_name], |row| {
            Ok((row.get("column_name")?, row.get("index_name")?))
        })
        .context("could not get indices info")?
        .collect::<Result<_, rusqlite::Error>>()?;

    let indexed_columns_to_compress = columns_info
        .iter()
        .filter(|c| match indexed_columns.get(&c.name) {
            Some(_) => c.to_compress,
            None => false,
        })
        .collect::<Vec<&ColumnInfo>>();

    if !indexed_columns_to_compress.is_empty() {
        let columns_indices = indexed_columns_to_compress
            .iter()
            .map(|c| format!("{} ({})", c.name, indexed_columns.get(&c.name).unwrap()))
            .collect::<Vec<String>>()
            .join(", ");
        anyhow::bail!(
            "Can't compress column(s): {} - used as part of index (this could probably be supported, but currently isn't)",
            columns_indices
        );
    };
    Ok(())
}

fn create_or_replace_view(
    db: &rusqlite::Connection,
    columns_info: &[ColumnInfo],
    table_name: &str,
    internal_table_name: &str,
    table_already_enabled: bool,
) -> anyhow::Result<()> {
    if table_already_enabled {
        // this drops the existing triggers as well
        let dropview_query = format!(r#"drop view {}"#, escape_sqlite_identifier(table_name));
        log::debug!("[run] {}", &dropview_query);
        db.execute(&dropview_query, params![])
            .context("Could not drop view")?;
    }

    // create view
    let select_columns_escaped = columns_info
        .iter()
        .filter(|c| !c.is_dict_id )
        .map(|c| {
            if c.to_compress {
                let affinity_is_text = match get_column_affinity(&c.coltype) {
                    SqliteAffinity::Blob => false,
                    SqliteAffinity::Text => true,
                    other => anyhow::bail!("the to-compress column has type {} which has affinity {:?}, but affinity must be text or blob. See https://www.sqlite.org/draft/datatype3.html#determination_of_column_affinity", c.coltype, other)
                };
                Ok(format!(
                    // prepared statement parameters not allowed in view
                    "zstd_decompress_col({}, {}, {}, {}) as {0}",
                    &escape_sqlite_identifier(&c.name),
                    if affinity_is_text { 1 } else { 0 },
                    &escape_sqlite_identifier(&get_dict_id(&c.name)),
                    COMPACT
                ))
            } else {
                Ok(format_sqlite!("{}", &c.name))
            }
        })
        .collect::<Result<Vec<String>, _>>()
        .context("could not construct select in view")?
        .join(", ");
    let createview_query = format!(
        r#"
        create view {} as
            select {}
            from {}
        "#,
        escape_sqlite_identifier(table_name),
        select_columns_escaped,
        escape_sqlite_identifier(internal_table_name)
    );
    log::debug!("[run] {}", &createview_query);
    db.execute(&createview_query, params![])
        .context("Could not create view")?;
    Ok(())
}

fn create_insert_trigger(
    db: &rusqlite::Connection,
    columns_info: &[ColumnInfo],
    table_name: &str,
    internal_table_name: &str,
    _config: &TransparentCompressConfig,
) -> anyhow::Result<()> {
    let trigger_name = format!("{table_name}_insert_trigger");

    // expressions that map backing table columns to view columns
    let mut insert_selection = vec![];
    // names of the columns to be inserted
    let mut columns_selection = vec![];

    for c in columns_info {
        if c.is_dict_id {
            continue;
        }
        columns_selection.push(String::from(&c.name));
        if c.to_compress {
            let dict_id = get_dict_id(&c.name);
            insert_selection.push(format!(
                // prepared statement parameters not allowed in view
                "new.{col} as {col}, null as {dictcol}",
                col = escape_sqlite_identifier(&c.name),
                dictcol = escape_sqlite_identifier(&dict_id)
            ));
            columns_selection.push(String::from(&dict_id));
        } else {
            insert_selection.push(format_sqlite!("new.{}", &c.name));
        }
    }

    let createtrigger_query = format!(
        "
        create trigger {}
            instead of insert on {}
            for each row
            begin
                insert into {}({}) select {};
            end;
        ",
        escape_sqlite_identifier(&trigger_name),
        escape_sqlite_identifier(table_name),
        escape_sqlite_identifier(internal_table_name),
        columns_selection.join(", "),
        insert_selection.join(",\n"),
    );
    log::debug!("[run] {}", &createtrigger_query);
    db.execute(&createtrigger_query, params![])
        .context("Could not create insert trigger")?;
    Ok(())
}

fn create_delete_trigger(
    db: &rusqlite::Connection,
    table_name: &str,
    internal_table_name: &str,
    primary_key_condition: &str,
) -> anyhow::Result<()> {
    let trigger_name = format!("{table_name}_delete_trigger");

    let deletetrigger_query = format!(
        "
        create trigger {trg_name}
            instead of delete on {view}
            for each row
            begin
                delete from {backing_table} where {primary_key_condition};
            end;
        ",
        trg_name = escape_sqlite_identifier(&trigger_name),
        view = escape_sqlite_identifier(table_name),
        backing_table = escape_sqlite_identifier(internal_table_name),
        primary_key_condition = primary_key_condition
    );
    log::debug!("[run] {}", &deletetrigger_query);
    db.execute(&deletetrigger_query, params![])
        .context("could not create delete trigger")?;
    Ok(())
}

fn create_update_triggers(
    db: &rusqlite::Connection,
    columns_info: &[ColumnInfo],
    table_name: &str,
    internal_table_name: &str,
    primary_key_condition: &str,
    _config: &TransparentCompressConfig,
) -> anyhow::Result<()> {
    for col in columns_info {
        if col.is_dict_id {
            continue;
        }

        let trigger_name = format!("{}_update_{}_trigger", table_name, col.name);

        let update = if col.to_compress {
            format!(
                "{col} = new.{col}, {dictcol} = null",
                col = escape_sqlite_identifier(&col.name),
                dictcol = escape_sqlite_identifier(&get_dict_id(&col.name)),
            )
        } else {
            format_sqlite!("{} = new.{}", &col.name, &col.name)
        };
        // update triggers
        let updatetrigger_query = format!(
            "
            create trigger {trg_name}
                instead of update of {upd_col} on {view_name}
                for each row
                begin
                    update {backing_table} set {update} where {primary_key_condition};
                end;
            ",
            trg_name = escape_sqlite_identifier(&trigger_name),
            view_name = escape_sqlite_identifier(table_name),
            backing_table = escape_sqlite_identifier(internal_table_name),
            upd_col = escape_sqlite_identifier(&col.name),
            update = update,
            primary_key_condition = primary_key_condition
        );
        log::debug!("[run] {}", &updatetrigger_query);
        db.execute(&updatetrigger_query, params![])
            .with_context(|| format!("Could not create update of {} trigger", col.name))?;
    }
    Ok(())
}

fn get_configs(db: &rusqlite::Connection) -> Result<Vec<TransparentCompressConfig>, anyhow::Error> {
    // if the table `_zstd_configs` does not exist yet, transparent compression hasn't been used yet, so return an empty array
    if !check_table_exists(db, "_zstd_configs") {
        return Ok(vec![]);
    }

    let configs = db
        .prepare("select config from _zstd_configs")?
        .query_map(params![], |row| {
            serde_json::from_str(row.get_ref_unwrap("config").as_str()?)
                .context("parsing config")
                .map_err(ah)
        })
        .context("Couldn't fetch configs")?
        .collect::<Result<Vec<TransparentCompressConfig>, rusqlite::Error>>()?;
    Ok(configs)
}

#[derive(Debug)]
struct TodoInfo {
    dict_choice: Option<String>,
    count: i64,
    total_bytes: i64,
}

struct IncrementalMaintenanceArgs {
    end_limit: Instant,
    target_db_load: f32,
    time_limit: f64,
}
pub fn zstd_incremental_maintenance<'a>(ctx: &Context) -> Result<ToSqlOutput<'a>, anyhow::Error> {
    let args = {
        let arg_time_limit_seconds = 0;
        let arg_target_db_load = 1;
        let time_limit: Option<f64> = ctx
            .get(arg_time_limit_seconds)
            .context("could not get time limit argument")?;
        let time_limit = time_limit.unwrap_or(100000000.0);
        let target_db_load: f32 = ctx
            .get(arg_target_db_load)
            .context("could not get target db load argument")?;
        if !(0.0..=1e100).contains(&time_limit) {
            anyhow::bail!("time too large");
        }
        let end_limit = Instant::now() + Duration::from_secs_f64(time_limit);
        IncrementalMaintenanceArgs {
            end_limit,
            target_db_load,
            time_limit,
        }
    };
    let db = unsafe { ctx.get_connection()? };
    show_warnings(&db)?;
    let configs = get_configs(&db)?;

    for config in configs {
        match maintenance_for_config(&db, config, &args)? {
            MaintRet::TimeLimitReached => {
                log::info!(
                    "time limit of {:.1}s reached, stopping with more maintenance work pending",
                    args.time_limit
                );
                return Ok(1.into());
            }
            MaintRet::Completed => {}
        }
    }
    log::info!("All maintenance work completed!");
    Ok(0.into())
}

enum MaintRet {
    TimeLimitReached,
    Completed,
}

struct EscapedNames {
    compressed_tablename: String,
    data_colname: String,
    dict_colname: String,
}
impl From<&TransparentCompressConfig> for EscapedNames {
    fn from(config: &TransparentCompressConfig) -> EscapedNames {
        EscapedNames {
            compressed_tablename: escape_sqlite_identifier(&format!("_{}_zstd", config.table)),
            data_colname: escape_sqlite_identifier(&config.column),
            dict_colname: escape_sqlite_identifier(&format!("_{}_dict", config.column)),
        }
    }
}

fn maintenance_for_config(
    db: &Connection,
    config: TransparentCompressConfig,
    args: &IncrementalMaintenanceArgs,
) -> anyhow::Result<MaintRet> {
    let esc_names = EscapedNames::from(&config);

    let todos = db
        .prepare(&format!(
            "select
            ({chooser}) as dict_choice,
            count(*) as count,
            sum(length({datacol})) as total_bytes
        from {tbl} where {dictcol} is null group by dict_choice",
            tbl = esc_names.compressed_tablename,
            dictcol = esc_names.dict_colname,
            datacol = esc_names.data_colname,
            chooser = config.dict_chooser
        ))?
        .query_map(params![], |row| {
            Ok(TodoInfo {
                dict_choice: row.get("dict_choice")?,
                count: row.get("count")?,
                total_bytes: row.get("total_bytes")?,
            })
        })?
        .collect::<Result<Vec<_>, _>>()?;

    let total_bytes_to_compress: i64 = todos
        .iter()
        .filter(|e| e.dict_choice.is_some())
        .map(|e| e.total_bytes)
        .sum();
    let mut rows_compressed_so_far: i64 = 0;
    let mut bytes_compressed_so_far: i64 = 0;
    let total_rows_to_compress: i64 = todos
        .iter()
        .filter(|e| e.dict_choice.is_some())
        .map(|e| e.count)
        .sum();
    log::info!(
        "{}.{}: Total {} rows ({}) to potentially compress (split in {} groups).",
        config.table,
        config.column,
        total_rows_to_compress,
        pretty_bytes(total_bytes_to_compress),
        todos.len()
    );
    for todo in todos.into_iter() {
        let rows_handled = maintenance_for_todo(db, &config, &todo, &esc_names, args)?;
        rows_compressed_so_far += rows_handled;
        // estimate bytes compressed
        bytes_compressed_so_far +=
            ((rows_handled as f64 / todo.count as f64) * todo.total_bytes as f64) as i64;
        if rows_handled > 0 {
            log::info!(
                "Handled {} / {} rows  ({} / {})",
                rows_compressed_so_far,
                total_rows_to_compress,
                pretty_bytes(bytes_compressed_so_far),
                pretty_bytes(total_bytes_to_compress)
            );
        }
        if Instant::now() > args.end_limit {
            return Ok(MaintRet::TimeLimitReached);
        }
    }
    Ok(MaintRet::Completed)
}

fn maintenance_for_todo(
    db: &Connection,
    config: &TransparentCompressConfig,
    todo: &TodoInfo,
    esc_names: &EscapedNames,
    args: &IncrementalMaintenanceArgs,
) -> anyhow::Result<i64> {
    let avg_sample_bytes = todo.total_bytes / todo.count;
    let dict_choice = todo.dict_choice.as_deref().unwrap_or("[null]");
    log::debug!(
        "looking at group={}, has {} rows with {} average size ({} total)",
        dict_choice,
        todo.count,
        pretty_bytes(avg_sample_bytes),
        pretty_bytes(todo.total_bytes)
    );

    let (dict_id, dict_is_new) =
        match get_or_train_dict(db, config, todo, esc_names).context("getting dict")? {
            TrainDictReturn::Skip => return Ok(0),
            TrainDictReturn::Done {
                dict_id,
                dict_is_new,
            } => (dict_id, dict_is_new),
        };

    let mut total_updated: i64 = 0;
    let mut chunk_size = config.incremental_compression_step_bytes / avg_sample_bytes;
    if chunk_size < 1 {
        chunk_size = 1;
    }
    log::debug!(
        "Compressing {} samples with key {} and level {}, chunksize {}",
        todo.count,
        dict_choice,
        config.compression_level,
        chunk_size
    );
    loop {
        let update_start = Instant::now();
        let q = &format!(
            "update {tbl} set {datacol} = zstd_compress_col({datacol}, :lvl, :dict, :compact), {dictcol} = :dict where rowid in (select rowid from {tbl} where {dictcol} is null and :dictchoice = ({chooser}) limit :chunksize)",
            tbl = esc_names.compressed_tablename,
            datacol = esc_names.data_colname,
            dictcol = esc_names.dict_colname,
            chooser = config.dict_chooser
        );
        log::trace!("executing {}", q);
        let updated = db
            .execute(
                q,
                named_params! {
                    ":lvl": config.compression_level,
                    ":dict": dict_id,
                    ":dictchoice": &dict_choice,
                    ":chunksize": chunk_size,
                    ":compact": COMPACT
                },
            )
            .with_context(|| format!("while compressing chunk for key {dict_choice}"))?;

        total_updated += updated as i64;
        log::debug!("Compressed {} / {}", total_updated, todo.count);
        if Instant::now() > args.end_limit {
            break;
        }
        let elapsed = update_start.elapsed();
        if elapsed.div_f32(args.target_db_load) > elapsed {
            let sleep_duration = elapsed.div_f32(args.target_db_load) - elapsed;
            if sleep_duration > Duration::from_millis(1) {
                log::debug!(
                    "Sleeping {}s to keep write load at {}",
                    sleep_duration.as_secs_f32(),
                    args.target_db_load
                );
                std::thread::sleep(sleep_duration);
            }
        }

        if updated == 0 {
            break;
        }
    }

    let (total_size_after, total_count_after): (i64, i64) = db.query_row(
        &format!(
            "select sum(length({datacol})), count(*) from {tbl} where {dictcol} = ?",
            tbl = esc_names.compressed_tablename,
            datacol = esc_names.data_colname,
            dictcol = esc_names.dict_colname
        ),
        params![dict_id],
        |row| Ok((row.get(0)?, row.get(1)?)),
    )?;
    if dict_is_new {
        log::info!(
            "Compressed {} rows with dict_choice={} (dict_id={}). Total size of entries before: {}, afterwards: {}, (average: before={}, after={})",
            total_updated,
            dict_choice,
            dict_id,
            pretty_bytes(todo.total_bytes),
            pretty_bytes(total_size_after),
            pretty_bytes(avg_sample_bytes),
            pretty_bytes(total_size_after / total_count_after),
        );
    }
    Ok(total_updated)
}

enum TrainDictReturn {
    Skip,
    Done { dict_id: i32, dict_is_new: bool },
}
fn get_or_train_dict(
    db: &Connection,
    config: &TransparentCompressConfig,
    todo: &TodoInfo,
    esc_names: &EscapedNames,
) -> anyhow::Result<TrainDictReturn> {
    let dict_choice = match &todo.dict_choice {
        None => {
            log::debug!("Skipping group, no dict chosen");
            return Ok(TrainDictReturn::Skip);
        }
        Some(e) => e,
    };
    if dict_choice == "[nodict]" {
        return Ok(TrainDictReturn::Done {
            dict_id: -1,
            dict_is_new: false,
        });
    }

    let avg_sample_bytes = todo.total_bytes / todo.count;
    let dict_id: Option<i32> = db
        .query_row(
            "select id from _zstd_dicts where chooser_key = ?",
            params![dict_choice],
            |row| row.get("id"),
        )
        .optional()?;
    Ok(match dict_id {
        Some(dict_id) => {
            log::debug!(
                "Found existing dictionary id={} for key={}",
                dict_id,
                dict_choice
            );
            TrainDictReturn::Done {
                dict_id,
                dict_is_new: false,
            }
        }
        None => {
            let dict_target_size = (todo.total_bytes as f32 * config.dict_size_ratio) as i64;

            if dict_target_size < config.min_dict_size_bytes_for_training {
                log::debug!(
                    "Dictionary for group '{}' would be smaller than minimum ({} * {:.3} = {} < {}), ignoring",
                    dict_choice,
                    pretty_bytes(todo.total_bytes),
                    config.dict_size_ratio,
                    pretty_bytes(dict_target_size),
                    pretty_bytes(config.min_dict_size_bytes_for_training)
                );
                return Ok(TrainDictReturn::Skip);
            }
            let target_samples = (dict_target_size as f32 * config.train_dict_samples_ratio
                / avg_sample_bytes as f32) as i64; // use roughly 100x the size of the dictionary as data

            log::debug!(
                "Training dict for key {} of max size {}",
                dict_choice,
                pretty_bytes(dict_target_size)
            );
            let dict_id = db.query_row(&format!(
                "select zstd_train_dict_and_save({datacol}, ?, ?, ?) as dictid from {tbl} where {dictcol} is null and ? = ({chooser})", 
                datacol=esc_names.data_colname,
                tbl=esc_names.compressed_tablename,
                dictcol=esc_names.dict_colname,
                chooser=config.dict_chooser
            ), params![dict_target_size, target_samples, dict_choice, dict_choice], |row| row.get("dictid"))?;
            TrainDictReturn::Done {
                dict_id,
                dict_is_new: true,
            }
        }
    })
}
#[cfg(test)]
mod tests {
    use super::add_functions::tests::create_example_db;
    use super::*;
    use pretty_assertions::assert_eq;
    use rand::prelude::SliceRandom;
    use rusqlite::params;
    use rusqlite::{Connection, Row};

    fn row_to_thong(r: &Row) -> anyhow::Result<Vec<Value>> {
        Ok((0..r.as_ref().column_count())
            .map(|i| r.get_ref(i).map(|e| e.into()))
            .collect::<Result<_, _>>()?)
    }

    fn get_whole_table(db: &Connection, tbl_name: &str) -> anyhow::Result<Vec<Vec<Value>>> {
        let mut stmt = db.prepare(&format!("select * from {tbl_name} ORDER BY id"))?;
        let q1: Vec<Vec<Value>> = stmt
            .query_map(params![], |e| row_to_thong(e).map_err(ah))?
            .collect::<Result<_, rusqlite::Error>>()?;
        Ok(q1)
    }

    fn check_table_rows_same(db1: &Connection, db2: &Connection) -> anyhow::Result<()> {
        let tbl1 = get_whole_table(db1, "events").context("Could not get whole table db 1")?;
        let tbl2 = get_whole_table(db2, "events").context("Could not get whole table db 2")?;
        assert_eq!(tbl1, tbl2);

        Ok(())
    }

    #[test]
    fn sanity() -> anyhow::Result<()> {
        let db1 = create_example_db(Some(123), 100)?;
        let db2 = create_example_db(Some(123), 100)?;

        check_table_rows_same(&db1, &db2)?;

        Ok(())
    }

    #[test]
    fn no_configs() -> anyhow::Result<()> {
        let db = create_example_db(Some(123), 100)?;

        assert_eq!(get_configs(&db)?.len(), 0);
        Ok(())
    }

    fn get_two_dbs(seed: Option<u64>) -> anyhow::Result<(Connection, Connection)> {
        if std::env::var("RUST_LOG").is_err() {
            // TODO: Audit that the environment access only happens in single-threaded code.
            unsafe { std::env::set_var("RUST_LOG", "info") };
        }
        env_logger::try_init().ok();

        let db1 = create_example_db(seed, 2000)?;
        let db2 = create_example_db(seed, 2000)?;

        db2.query_row(
            r#"select zstd_enable_transparent(?)"#,
            params![r#"{"table": "events", "column": "data", "compression_level": 3, "dict_chooser": "'1'"}"#],
            |_| Ok(())
        ).context("enable transparent")?;

        Ok((db1, db2))
    }
    #[test]
    fn enable_transparent() -> anyhow::Result<()> {
        let (db1, db2) = get_two_dbs(Some(123))?;
        check_table_rows_same(&db1, &db2)?;

        Ok(())
    }

    fn get_rand_id(db: &Connection) -> anyhow::Result<i64> {
        db.query_row(
            "select id from events order by random() limit 1",
            params![],
            |r| r.get(0),
        )
        .context("Could not get random id")
    }

    fn insert(db: &Connection, _id: i64, _id2: i64) -> anyhow::Result<()> {
        let query = r#"insert into events (timestamp, data) values ('2020-12-20T00:00:00Z', '{"foo": "bar"}')"#;

        db.execute(query, params![])?;

        Ok(())
    }

    fn insert_both_columns(db: &Connection, _id: i64, _id2: i64) -> anyhow::Result<()> {
        let query = r#"insert into events (timestamp, data, another_col) values ('2020-12-20T00:00:00Z', '{"foo": "bar"}', 'rustacean')"#;

        db.execute(query, params![])?;

        Ok(())
    }

    fn update_comp_col(db: &Connection, id: i64, _id2: i64) -> anyhow::Result<()> {
        let _updc = db.execute("update events set data='fooooooooooooooooooooooooooooooooooooooooooooobar' where id = ?", params![id]).context("updating compressed column")?;

        //assert_eq!(updc, 1);
        Ok(())
    }

    fn update_other_col(db: &Connection, id: i64, _id2: i64) -> anyhow::Result<()> {
        let _updc = db
            .execute(
                "update events set timestamp = '2020-02-01' where id = ?",
                params![id],
            )
            .context("updating other column")?;
        //assert_eq!(updc, 1);
        Ok(())
    }

    fn update_other_two_col(db: &Connection, id: i64, id2: i64) -> anyhow::Result<()> {
        //thread::rand
        delete_one(db, id2, id)?;
        let _updc = db
            .execute(
                "update events set timestamp = '2020-02-01', id=? where id = ?",
                params![id2, id],
            )
            .context("updating other two column")?;
        //assert_eq!(updc, 1);
        Ok(())
    }

    fn update_comp_col_and_other_two_col(db: &Connection, id: i64, id2: i64) -> anyhow::Result<()> {
        //thread::rand
        delete_one(db, id2, id)?;
        let _updc = db.execute("update events set timestamp = '2020-02-01', id=?, data='fooooooooooooooooooooooooooooooooooooooooooooobar' where id = ?", params![id2,id]).context("updating three column")?;
        //assert_eq!(updc, 1);
        Ok(())
    }

    fn update_two_rows(db: &Connection, id: i64, id2: i64) -> anyhow::Result<()> {
        //thread::rand
        let _updc = db.execute("update events set timestamp = '2020-02-01', data='fooooooooooooooooooooooooooooooooooooooooooooobar' where id in (:a, :b)", 
        named_params! {":a": id, ":b": id2}).context("updating two rows")?;
        //assert_eq!(updc, 2);
        Ok(())
    }

    fn update_two_rows_by_compressed(db: &Connection, id: i64, id2: i64) -> anyhow::Result<()> {
        let _updc = db
            .execute(
                "update events set data = 'testingxy' where id in (?, ?)",
                params![id, id2],
            )
            .context("updating two rows replace compressed")?;
        //assert_eq!(updc, 2);
        //thread::rand
        let _updc = db
            .execute(
                "update events set timestamp='1234' where data = 'testingxy'",
                params![],
            )
            .context("updating where compressed=...")?;
        //assert_eq!(updc, 2);
        Ok(())
    }

    fn delete_one(db: &Connection, id: i64, _id2: i64) -> anyhow::Result<()> {
        let _updc = db
            .execute("delete from events where id = ?", params![id])
            .context("deleting from events by id")?;
        //assert_eq!(updc, 1);
        Ok(())
    }

    fn delete_where_other(db: &Connection, id: i64, _id2: i64) -> anyhow::Result<()> {
        let ts: String = db.query_row(
            "select timestamp from events where id = ?",
            params![id],
            |r| r.get(0),
        )?;
        let _updc = db
            .execute("delete from events where timestamp = ?", params![ts])
            .context("deleting by timestamp")?;
        //assert_eq!(updc, 1);
        Ok(())
    }

    #[test]
    fn test_many() -> anyhow::Result<()> {
        type Executor = dyn Fn(&Connection, i64, i64) -> anyhow::Result<()>;
        let posses: Vec<&Executor> = vec![
            &insert,
            &update_comp_col,
            &update_other_col,
            &update_other_two_col,
            &update_comp_col_and_other_two_col,
            &update_two_rows,
            &update_two_rows_by_compressed,
            &delete_one,
            &delete_where_other,
        ];

        let mut posses2 = vec![];
        for _ in 0..100 {
            posses2.push(*posses.choose(&mut rand::thread_rng()).unwrap());
        }
        for compress_first in [false, true] {
            for operations in &[&posses2] {
                if compress_first {
                    let (db1, db2) =
                        get_two_dbs(Some(123)).context("Could not create databases")?;
                    if compress_first {
                        let done: i64 = db2.query_row(
                            "select zstd_incremental_maintenance(9999999, 1)",
                            params![],
                            |r| r.get(0),
                        )?;

                        assert_eq!(done, 0);

                        let uncompressed_count: i64 = db2
                            .query_row(
                                "select count(*) from _events_zstd where _data_dict is null",
                                params![],
                                |r| r.get(0),
                            )
                            .context("Could not query uncompressed count")?;
                        assert_eq!(uncompressed_count, 0);
                    }

                    for operation in *operations {
                        let id = get_rand_id(&db1)?;
                        let id2 = get_rand_id(&db2)?;
                        operation(&db1, id, id2)
                            .context("Could not run operation on uncompressed db")?;
                        operation(&db2, id, id2)
                            .context("Could not run operation on compressed db")?;
                    }

                    check_table_rows_same(&db1, &db2)?;
                }
            }
        }

        Ok(())
    }

    #[test]
    fn columns_of_the_same_table_are_enabled() -> anyhow::Result<()> {
        let (db1, db2) = get_two_dbs(Some(456)).context("Could not create databases")?;
        db2.query_row(
            r#"select zstd_enable_transparent(?)"#,
            params![r#"{"table": "events", "column": "another_col", "compression_level": 3, "dict_chooser": "'1'"}"#],
            |_| Ok(())
        ).context("enable transparent")?;

        let done: i64 = db2.query_row(
            "select zstd_incremental_maintenance(9999999, 1)",
            params![],
            |r| r.get(0),
        )?;

        assert_eq!(done, 0);

        let uncompressed_count: i64 = db2
            .query_row(
                "select count(*) from _events_zstd where _data_dict is null",
                params![],
                |r| r.get(0),
            )
            .context("Could not query uncompressed count")?;
        assert_eq!(uncompressed_count, 0);

        let id = get_rand_id(&db1)?;
        let id2 = get_rand_id(&db2)?;
        insert_both_columns(&db1, id, id2).context("Could not run operation on uncompressed db")?;
        insert_both_columns(&db2, id, id2).context("Could not run operation on compressed db")?;

        check_table_rows_same(&db1, &db2)?;

        Ok(())
    }

    #[test]
    #[should_panic(expected = "another_col (another_col_idx) - used as part of index")]
    fn indexed_column_cannot_be_enabled() {
        let db = create_example_db(None, 1100).unwrap();

        // When column of original table is indexed
        db.execute(
            "create index another_col_idx on events (another_col)",
            params![],
        )
        .unwrap();

        db.query_row(
            r#"select zstd_enable_transparent(?)"#,
            params![r#"{"table": "events", "column": "another_col", "compression_level": 3, "dict_chooser": "'1'"}"#],
            |_| Ok(())
        ).unwrap();
    }

    #[test]
    #[should_panic(expected = "another_col is already enabled for compression")]
    fn same_column_is_not_allowed_to_be_enabled_multiple_times() {
        let db = create_example_db(None, 1100).unwrap();

        db.query_row(
            r#"select zstd_enable_transparent(?)"#,
            params![r#"{"table": "events", "column": "another_col", "compression_level": 3, "dict_chooser": "'1'"}"#],
            |_| Ok(())
        ).unwrap();

        db.query_row(
            r#"select zstd_enable_transparent(?)"#,
            params![r#"{"table": "events", "column": "another_col", "compression_level": 3, "dict_chooser": "'1'"}"#],
            |_| Ok(())
        ).unwrap();
    }
}