datafusion_table_providers/duckdb/
settings.rs

1//! # DuckDB Settings System
2//!
3//! This module provides a flexible, trait-based system for managing DuckDB database settings.
4//! Library users can easily extend the system with their own custom settings without modifying
5//! this crate.
6//!
7//! ## Basic Usage
8//!
9//! ```rust,no_run
10//! use datafusion_table_providers::duckdb::{DuckDBSettingsRegistry, MemoryLimitSetting};
11//! use std::collections::HashMap;
12//!
13//! // Create a registry with default settings
14//! let mut registry = DuckDBSettingsRegistry::new();
15//!
16//! // Or create an empty registry and add settings manually
17//! let mut empty_registry = DuckDBSettingsRegistry::empty();
18//! empty_registry.register(Box::new(MemoryLimitSetting));
19//!
20//! // Use with options
21//! let mut options = HashMap::new();
22//! options.insert("memory_limit".to_string(), "2GB".to_string());
23//!
24//! // The registry will automatically apply settings when used with DuckDBTableProviderFactory
25//! ```
26//!
27//! ## Creating Custom Settings
28//!
29//! Library users can create their own settings by implementing the `DuckDBSetting` trait:
30//!
31//! ```rust,no_run
32//! use datafusion_table_providers::duckdb::{DuckDBSetting, DuckDBSettingScope};
33//! use datafusion_table_providers::duckdb::Error;
34//! use std::collections::HashMap;
35//!
36//! #[derive(Debug)]
37//! struct CustomTimeoutSetting;
38//!
39//! impl DuckDBSetting for CustomTimeoutSetting {
40//!     fn as_any(&self) -> &dyn std::any::Any {
41//!         self
42//!     }
43//!
44//!     fn setting_name(&self) -> &'static str {
45//!         "query_timeout"
46//!     }
47//!
48//!     fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
49//!         options.get("query_timeout").cloned()
50//!     }
51//!
52//!     fn scope(&self) -> DuckDBSettingScope {
53//!         DuckDBSettingScope::Global
54//!     }
55//!
56//!     fn validate(&self, value: &str) -> Result<(), Error> {
57//!         // Validate that the timeout is a valid number
58//!         value.parse::<u64>().map_err(|_| Error::DbConnectionError {
59//!             source: format!("Invalid timeout value: {}", value).into(),
60//!         })?;
61//!         Ok(())
62//!     }
63//!
64//!     fn format_sql_value(&self, value: &str) -> String {
65//!         // No quotes needed for numeric values
66//!         value.to_string()
67//!     }
68//! }
69//! ```
70//!
71//! ## Unconditional Settings
72//!
73//! You can create settings that always apply regardless of the options:
74//!
75//! ```rust,no_run
76//! use datafusion_table_providers::duckdb::{DuckDBSetting, DuckDBSettingScope};
77//! use std::collections::HashMap;
78//!
79//! #[derive(Debug)]
80//! struct AlwaysEnabledFeature;
81//!
82//! impl DuckDBSetting for AlwaysEnabledFeature {
83//!     fn as_any(&self) -> &dyn std::any::Any { self }
84//!     
85//!     fn setting_name(&self) -> &'static str {
86//!         "enable_feature"
87//!     }
88//!
89//!     fn scope(&self) -> DuckDBSettingScope {
90//!         DuckDBSettingScope::Global
91//!     }
92//!
93//!     fn get_value(&self, _options: &HashMap<String, String>) -> Option<String> {
94//!         // Always return a value, regardless of options
95//!         Some("true".to_string())
96//!     }
97//! }
98//! ```
99
100use crate::sql::db_connection_pool::dbconnection::duckdbconn::DuckDBSyncParameter;
101use crate::{duckdb::Error, sql::db_connection_pool::dbconnection::SyncDbConnection};
102use datafusion::error::{DataFusionError, Result as DataFusionResult};
103use duckdb::DuckdbConnectionManager;
104use r2d2::PooledConnection;
105use snafu::prelude::*;
106use std::collections::HashMap;
107use std::sync::Arc;
108
109/// Indicates the scope of a DuckDB setting
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub enum DuckDBSettingScope {
112    /// Global settings are applied once to the entire DuckDB instance
113    Global,
114    /// Local settings are applied to each connection
115    Local,
116}
117
118/// Trait for DuckDB settings that can be applied to a connection
119pub trait DuckDBSetting: Send + Sync + std::fmt::Debug {
120    fn as_any(&self) -> &dyn std::any::Any;
121
122    /// The name of the DuckDB setting, i.e. `SET <setting_name> = <value>`
123    fn setting_name(&self) -> &'static str;
124
125    /// The scope of the DuckDB setting, see [DuckDB documentation](https://duckdb.org/docs/stable/sql/statements/set.html#scopes)
126    fn scope(&self) -> DuckDBSettingScope;
127
128    /// Get the value for this setting from the options, if available
129    fn get_value(&self, options: &HashMap<String, String>) -> Option<String>;
130
131    /// Validate the value before applying it
132    fn validate(&self, _value: &str) -> Result<(), Error> {
133        Ok(())
134    }
135
136    /// Format the value for use in SQL (e.g., add quotes for strings)
137    fn format_sql_value(&self, value: &str) -> String {
138        value.to_string()
139    }
140}
141
142/// Registry for managing DuckDB settings
143#[derive(Debug)]
144pub struct DuckDBSettingsRegistry {
145    settings: Vec<Box<dyn DuckDBSetting>>,
146}
147
148impl Default for DuckDBSettingsRegistry {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154impl DuckDBSettingsRegistry {
155    /// Create a new registry with default settings
156    pub fn new() -> Self {
157        let mut registry = Self {
158            settings: Vec::new(),
159        };
160
161        // Register default settings
162        registry.register(Box::new(MemoryLimitSetting));
163        registry.register(Box::new(TempDirectorySetting));
164        registry.register(Box::new(PreserveInsertionOrderSetting));
165
166        registry
167    }
168
169    /// Create an empty registry without default settings
170    pub fn empty() -> Self {
171        Self {
172            settings: Vec::new(),
173        }
174    }
175
176    pub fn with_setting(mut self, setting: Box<dyn DuckDBSetting>) -> Self {
177        self.register(setting);
178        self
179    }
180
181    /// Register a new setting
182    pub fn register(&mut self, setting: Box<dyn DuckDBSetting>) {
183        self.settings.push(setting);
184    }
185
186    /// Apply all applicable settings to the connection pool
187    pub fn apply_settings(
188        &self,
189        conn: &dyn SyncDbConnection<
190            PooledConnection<DuckdbConnectionManager>,
191            Box<dyn DuckDBSyncParameter>,
192        >,
193        options: &HashMap<String, String>,
194        scope: DuckDBSettingScope,
195    ) -> DataFusionResult<()> {
196        for setting in &self.settings {
197            if setting.scope() != scope {
198                tracing::debug!(
199                    "Skipping setting {} because it's not a {scope:?}",
200                    setting.setting_name(),
201                );
202                continue;
203            }
204
205            if let Some(value) = setting.get_value(options) {
206                setting
207                    .validate(&value)
208                    .map_err(|e| DataFusionError::External(Box::new(e)))?;
209
210                let set_statement = self.set_statement(setting.as_ref(), &value);
211                tracing::debug!("DuckDB: {}", set_statement);
212                conn.execute(&set_statement, &[])?;
213            }
214        }
215
216        Ok(())
217    }
218
219    /// Returns a list of DuckDB SET statements for the given settings and options
220    pub fn get_setting_statements(
221        &self,
222        options: &HashMap<String, String>,
223        scope: DuckDBSettingScope,
224    ) -> Vec<Arc<str>> {
225        self.settings
226            .iter()
227            .filter(|s| s.scope() == scope)
228            .filter_map(|s| {
229                s.get_value(options)
230                    .map(|value| self.set_statement(s.as_ref(), &value))
231            })
232            .map(|s| s.into())
233            .collect()
234    }
235
236    fn set_statement(&self, setting: &dyn DuckDBSetting, value: &str) -> String {
237        format!(
238            "SET {} = {}",
239            setting.setting_name(),
240            setting.format_sql_value(value)
241        )
242    }
243}
244
245/// Memory limit setting implementation
246#[derive(Debug)]
247pub struct MemoryLimitSetting;
248
249impl DuckDBSetting for MemoryLimitSetting {
250    fn as_any(&self) -> &dyn std::any::Any {
251        self
252    }
253
254    fn setting_name(&self) -> &'static str {
255        "memory_limit"
256    }
257
258    fn scope(&self) -> DuckDBSettingScope {
259        DuckDBSettingScope::Global
260    }
261
262    fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
263        options.get("memory_limit").cloned()
264    }
265
266    fn validate(&self, value: &str) -> Result<(), Error> {
267        byte_unit::Byte::parse_str(value, true).context(
268            crate::duckdb::UnableToParseMemoryLimitSnafu {
269                value: value.to_string(),
270            },
271        )?;
272        Ok(())
273    }
274
275    fn format_sql_value(&self, value: &str) -> String {
276        format!("'{}'", value)
277    }
278}
279
280/// Temp directory setting implementation
281#[derive(Debug)]
282pub struct TempDirectorySetting;
283
284impl DuckDBSetting for TempDirectorySetting {
285    fn as_any(&self) -> &dyn std::any::Any {
286        self
287    }
288
289    fn setting_name(&self) -> &'static str {
290        "temp_directory"
291    }
292
293    fn scope(&self) -> DuckDBSettingScope {
294        DuckDBSettingScope::Global
295    }
296
297    fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
298        options.get("temp_directory").cloned()
299    }
300
301    fn format_sql_value(&self, value: &str) -> String {
302        format!("'{}'", value)
303    }
304}
305
306/// Preserve insertion order setting implementation
307#[derive(Debug)]
308pub struct PreserveInsertionOrderSetting;
309
310impl DuckDBSetting for PreserveInsertionOrderSetting {
311    fn as_any(&self) -> &dyn std::any::Any {
312        self
313    }
314
315    fn setting_name(&self) -> &'static str {
316        "preserve_insertion_order"
317    }
318
319    fn scope(&self) -> DuckDBSettingScope {
320        DuckDBSettingScope::Global
321    }
322
323    fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
324        options.get("preserve_insertion_order").cloned()
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331    use std::collections::HashMap;
332
333    /// Test implementation for unconditional settings
334    #[derive(Debug)]
335    struct TestUnconditionalSetting {
336        name: &'static str,
337        value: String,
338    }
339
340    impl TestUnconditionalSetting {
341        fn new(name: &'static str, value: String) -> Self {
342            Self { name, value }
343        }
344    }
345
346    impl DuckDBSetting for TestUnconditionalSetting {
347        fn as_any(&self) -> &dyn std::any::Any {
348            self
349        }
350
351        fn setting_name(&self) -> &'static str {
352            self.name
353        }
354
355        fn get_value(&self, _options: &HashMap<String, String>) -> Option<String> {
356            // Always return the value regardless of options (unconditional)
357            Some(self.value.clone())
358        }
359
360        fn scope(&self) -> DuckDBSettingScope {
361            DuckDBSettingScope::Global
362        }
363
364        fn format_sql_value(&self, value: &str) -> String {
365            format!("'{}'", value)
366        }
367    }
368
369    /// Test implementation for custom validation
370    #[derive(Debug)]
371    struct TestValidatingSetting;
372
373    impl DuckDBSetting for TestValidatingSetting {
374        fn as_any(&self) -> &dyn std::any::Any {
375            self
376        }
377
378        fn setting_name(&self) -> &'static str {
379            "test_setting"
380        }
381
382        fn scope(&self) -> DuckDBSettingScope {
383            DuckDBSettingScope::Global
384        }
385
386        fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
387            options.get("test_setting").cloned()
388        }
389
390        fn validate(&self, value: &str) -> Result<(), Error> {
391            if value == "invalid" {
392                return Err(Error::DbConnectionError {
393                    source: "Test validation error".into(),
394                });
395            }
396            Ok(())
397        }
398    }
399
400    #[test]
401    fn test_memory_limit_setting() {
402        let setting = MemoryLimitSetting;
403
404        // Test setting name
405        assert_eq!(setting.setting_name(), "memory_limit");
406
407        // Test get_value with present key
408        let mut options = HashMap::new();
409        options.insert("memory_limit".to_string(), "1GB".to_string());
410        assert_eq!(setting.get_value(&options), Some("1GB".to_string()));
411
412        // Test get_value with missing key
413        let empty_options = HashMap::new();
414        assert_eq!(setting.get_value(&empty_options), None);
415
416        // Test format_sql_value
417        assert_eq!(setting.format_sql_value("1GB"), "'1GB'");
418
419        // Test validate with valid value
420        assert!(setting.validate("1GB").is_ok());
421        assert!(setting.validate("512MiB").is_ok());
422
423        // Test validate with invalid value
424        assert!(setting.validate("invalid").is_err());
425    }
426
427    #[test]
428    fn test_temp_directory_setting() {
429        let setting = TempDirectorySetting;
430
431        // Test setting name
432        assert_eq!(setting.setting_name(), "temp_directory");
433
434        // Test get_value with present key
435        let mut options = HashMap::new();
436        options.insert("temp_directory".to_string(), "/tmp/test".to_string());
437        assert_eq!(setting.get_value(&options), Some("/tmp/test".to_string()));
438
439        // Test get_value with missing key
440        let empty_options = HashMap::new();
441        assert_eq!(setting.get_value(&empty_options), None);
442
443        // Test format_sql_value
444        assert_eq!(setting.format_sql_value("/tmp/test"), "'/tmp/test'");
445
446        // Test validate (should always pass)
447        assert!(setting.validate("/tmp/test").is_ok());
448        assert!(setting.validate("any_value").is_ok());
449    }
450
451    #[test]
452    fn test_preserve_insertion_order_setting() {
453        let setting = PreserveInsertionOrderSetting;
454
455        // Test setting name
456        assert_eq!(setting.setting_name(), "preserve_insertion_order");
457
458        // Test get_value with present key
459        let mut options = HashMap::new();
460        options.insert("preserve_insertion_order".to_string(), "true".to_string());
461        assert_eq!(setting.get_value(&options), Some("true".to_string()));
462
463        // Test get_value with missing key
464        let empty_options = HashMap::new();
465        assert_eq!(setting.get_value(&empty_options), None);
466
467        // Test format_sql_value (default implementation - no quotes)
468        assert_eq!(setting.format_sql_value("true"), "true");
469        assert_eq!(setting.format_sql_value("false"), "false");
470
471        // Test validate (should always pass with default implementation)
472        assert!(setting.validate("true").is_ok());
473        assert!(setting.validate("false").is_ok());
474    }
475
476    #[test]
477    fn test_settings_registry_new() {
478        let registry = DuckDBSettingsRegistry::new();
479
480        // Registry should have 3 default settings
481        assert_eq!(registry.settings.len(), 3);
482
483        // Check that default settings are present by testing their names
484        let setting_names: Vec<&'static str> =
485            registry.settings.iter().map(|s| s.setting_name()).collect();
486
487        assert!(setting_names.contains(&"memory_limit"));
488        assert!(setting_names.contains(&"temp_directory"));
489        assert!(setting_names.contains(&"preserve_insertion_order"));
490    }
491
492    #[test]
493    fn test_settings_registry_empty() {
494        let registry = DuckDBSettingsRegistry::empty();
495
496        // Empty registry should have no settings
497        assert_eq!(registry.settings.len(), 0);
498    }
499
500    #[test]
501    fn test_settings_registry_default() {
502        let registry = DuckDBSettingsRegistry::default();
503
504        // Default should be the same as new()
505        assert_eq!(registry.settings.len(), 3);
506    }
507
508    #[test]
509    fn test_settings_registry_register() {
510        let mut registry = DuckDBSettingsRegistry::empty();
511
512        // Start with empty registry
513        assert_eq!(registry.settings.len(), 0);
514
515        // Register a test setting
516        registry.register(Box::new(TestUnconditionalSetting::new(
517            "test_setting",
518            "test_value".to_string(),
519        )));
520
521        // Should now have 1 setting
522        assert_eq!(registry.settings.len(), 1);
523        assert_eq!(registry.settings[0].setting_name(), "test_setting");
524
525        // Register another setting
526        registry.register(Box::new(MemoryLimitSetting));
527
528        // Should now have 2 settings
529        assert_eq!(registry.settings.len(), 2);
530    }
531
532    #[test]
533    fn test_unconditional_setting() {
534        let setting =
535            TestUnconditionalSetting::new("test_unconditional", "always_this_value".to_string());
536
537        // Test setting name
538        assert_eq!(setting.setting_name(), "test_unconditional");
539
540        // Test that it always returns a value regardless of options
541        let empty_options = HashMap::new();
542        assert_eq!(
543            setting.get_value(&empty_options),
544            Some("always_this_value".to_string())
545        );
546
547        let mut options_with_other_keys = HashMap::new();
548        options_with_other_keys.insert("some_other_key".to_string(), "some_value".to_string());
549        assert_eq!(
550            setting.get_value(&options_with_other_keys),
551            Some("always_this_value".to_string())
552        );
553
554        // Test format_sql_value
555        assert_eq!(setting.format_sql_value("test"), "'test'");
556    }
557
558    #[test]
559    fn test_custom_validation() {
560        let setting = TestValidatingSetting;
561
562        // Test setting name
563        assert_eq!(setting.setting_name(), "test_setting");
564
565        // Test get_value
566        let mut options = HashMap::new();
567        options.insert("test_setting".to_string(), "valid_value".to_string());
568        assert_eq!(setting.get_value(&options), Some("valid_value".to_string()));
569
570        // Test validation with valid value
571        assert!(setting.validate("valid_value").is_ok());
572
573        // Test validation with invalid value
574        assert!(setting.validate("invalid").is_err());
575    }
576
577    #[test]
578    fn test_trait_default_implementations() {
579        let setting = TestUnconditionalSetting::new("test", "value".to_string());
580
581        // Test default validate implementation (should always return Ok)
582        assert!(setting.validate("any_value").is_ok());
583
584        // Test default format_sql_value implementation (should return value as-is)
585        let setting_with_defaults = TestValidatingSetting;
586        assert_eq!(setting_with_defaults.format_sql_value("test"), "test");
587    }
588
589    #[test]
590    fn test_as_any_functionality() {
591        let memory_setting = MemoryLimitSetting;
592        let boxed_setting: Box<dyn DuckDBSetting> = Box::new(memory_setting);
593
594        // Test that we can downcast using as_any
595        let any_ref = boxed_setting.as_any();
596        assert!(any_ref.is::<MemoryLimitSetting>());
597
598        // Test downcasting
599        let downcasted = any_ref.downcast_ref::<MemoryLimitSetting>();
600        assert!(downcasted.is_some());
601
602        // Test that incorrect downcast fails
603        assert!(any_ref.downcast_ref::<TempDirectorySetting>().is_none());
604    }
605
606    #[test]
607    fn test_memory_limit_validation_edge_cases() {
608        let setting = MemoryLimitSetting;
609
610        // Test various valid memory limit formats
611        assert!(setting.validate("1KB").is_ok());
612        assert!(setting.validate("1MB").is_ok());
613        assert!(setting.validate("1GB").is_ok());
614        assert!(setting.validate("1TB").is_ok());
615        assert!(setting.validate("1KiB").is_ok());
616        assert!(setting.validate("1MiB").is_ok());
617        assert!(setting.validate("1GiB").is_ok());
618        assert!(setting.validate("1TiB").is_ok());
619        assert!(setting.validate("512.5MB").is_ok());
620        assert!(setting.validate("123").is_ok()); // Plain number (defaults to bytes)
621
622        // Test invalid formats
623        assert!(setting.validate("").is_err());
624        assert!(setting.validate("not_a_number").is_err());
625        assert!(setting.validate("123XB").is_err()); // Invalid unit
626        assert!(setting.validate("abc123MB").is_err()); // Invalid number format
627    }
628
629    #[test]
630    fn test_settings_ordering_in_registry() {
631        let mut registry = DuckDBSettingsRegistry::empty();
632
633        // Add settings in a specific order
634        registry.register(Box::new(TestUnconditionalSetting::new(
635            "first",
636            "1".to_string(),
637        )));
638        registry.register(Box::new(TestUnconditionalSetting::new(
639            "second",
640            "2".to_string(),
641        )));
642        registry.register(Box::new(TestUnconditionalSetting::new(
643            "third",
644            "3".to_string(),
645        )));
646
647        // Verify they're stored in the order they were added
648        assert_eq!(registry.settings[0].setting_name(), "first");
649        assert_eq!(registry.settings[1].setting_name(), "second");
650        assert_eq!(registry.settings[2].setting_name(), "third");
651    }
652
653    #[test]
654    fn test_multiple_settings_with_same_option_key() {
655        // Test case where multiple settings might look for the same option key
656        // (though this would be unusual in practice)
657
658        #[derive(Debug)]
659        struct TestSetting1;
660
661        #[derive(Debug)]
662        struct TestSetting2;
663
664        impl DuckDBSetting for TestSetting1 {
665            fn as_any(&self) -> &dyn std::any::Any {
666                self
667            }
668            fn setting_name(&self) -> &'static str {
669                "setting1"
670            }
671            fn scope(&self) -> DuckDBSettingScope {
672                DuckDBSettingScope::Global
673            }
674            fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
675                options.get("shared_key").cloned()
676            }
677        }
678
679        impl DuckDBSetting for TestSetting2 {
680            fn as_any(&self) -> &dyn std::any::Any {
681                self
682            }
683            fn setting_name(&self) -> &'static str {
684                "setting2"
685            }
686            fn scope(&self) -> DuckDBSettingScope {
687                DuckDBSettingScope::Global
688            }
689            fn get_value(&self, options: &HashMap<String, String>) -> Option<String> {
690                options.get("shared_key").cloned()
691            }
692        }
693
694        let setting1 = TestSetting1;
695        let setting2 = TestSetting2;
696
697        let mut options = HashMap::new();
698        options.insert("shared_key".to_string(), "shared_value".to_string());
699
700        // Both settings should get the same value
701        assert_eq!(
702            setting1.get_value(&options),
703            Some("shared_value".to_string())
704        );
705        assert_eq!(
706            setting2.get_value(&options),
707            Some("shared_value".to_string())
708        );
709    }
710}