dsq_cli/
config.rs

1//! Configuration management for dsq
2//!
3//! This module provides configuration management for dsq, handling configuration
4//! from multiple sources including command-line arguments, environment variables,
5//! and configuration files. It provides a unified configuration structure that
6//! can be used throughout the application.
7
8use std::fs;
9use std::path::{Path, PathBuf};
10
11use serde::{Deserialize, Serialize};
12
13#[cfg(feature = "cli")]
14use crate::cli::CliConfig;
15use dsq_core::{
16    error::{Error, Result},
17    filter::{ErrorMode, ExecutorConfig},
18    io::{ReadOptions, WriteOptions},
19    DataFormat,
20};
21
22/// Main configuration structure for dsq runtime
23#[derive(Debug, Clone, Serialize, Deserialize)]
24#[serde(default)]
25#[derive(Default)]
26pub struct Config {
27    /// Input/output configuration
28    pub io: IoConfig,
29    /// Filter execution configuration
30    pub filter: FilterConfig,
31    /// Format-specific configurations
32    pub formats: FormatConfigs,
33    /// Display and output configuration
34    pub display: DisplayConfig,
35    /// Performance and resource configuration
36    pub performance: PerformanceConfig,
37    /// Module and library configuration
38    pub modules: ModuleConfig,
39    /// Debug and diagnostic configuration
40    pub debug: DebugConfig,
41    /// Variables for filter execution
42    pub variables: std::collections::HashMap<String, serde_json::Value>,
43}
44
45/// Input/output configuration
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct IoConfig {
48    /// Default input format when not detected
49    pub default_input_format: Option<DataFormat>,
50    /// Default output format when not specified
51    pub default_output_format: Option<DataFormat>,
52    /// Whether to auto-detect formats
53    pub auto_detect_format: bool,
54    /// Buffer size for I/O operations
55    pub buffer_size: usize,
56    /// Whether to overwrite existing files by default
57    pub overwrite_by_default: bool,
58    /// Maximum file size for in-memory processing
59    pub max_memory_file_size: usize,
60    /// Maximum number of rows to output
61    pub limit: Option<usize>,
62}
63
64/// Filter execution configuration
65#[derive(Debug, Clone, Serialize, Deserialize)]
66#[serde(default)]
67pub struct FilterConfig {
68    /// Whether to use lazy evaluation by default
69    pub lazy_evaluation: bool,
70    /// Whether to enable DataFrame optimizations
71    pub dataframe_optimizations: bool,
72    /// Filter optimization level
73    pub optimization_level: String,
74    /// Maximum recursion depth
75    pub max_recursion_depth: usize,
76    /// Maximum execution time in seconds
77    pub max_execution_time: Option<u64>,
78    /// Whether to collect execution statistics
79    pub collect_stats: bool,
80    /// Error handling mode
81    pub error_mode: String,
82}
83
84/// Format-specific configurations
85#[derive(Debug, Clone, Serialize, Deserialize)]
86#[serde(default)]
87#[derive(Default)]
88pub struct FormatConfigs {
89    /// CSV configuration
90    pub csv: CsvConfig,
91    /// JSON configuration
92    pub json: JsonConfig,
93    /// Parquet configuration
94    pub parquet: ParquetConfig,
95}
96
97/// CSV format configuration
98#[derive(Debug, Clone, Serialize, Deserialize)]
99#[serde(default)]
100pub struct CsvConfig {
101    /// Default field separator
102    pub separator: String,
103    /// Whether files have headers by default
104    pub has_header: bool,
105    /// Quote character
106    pub quote_char: String,
107    /// Comment character
108    pub comment_char: Option<String>,
109    /// Values to treat as null
110    pub null_values: Vec<String>,
111    /// Whether to trim whitespace
112    pub trim_whitespace: bool,
113    /// Number of rows for schema inference
114    pub infer_schema_length: usize,
115}
116
117/// JSON format configuration
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct JsonConfig {
120    /// Whether to pretty-print by default
121    pub pretty_print: bool,
122    /// Whether to maintain field order
123    pub maintain_order: bool,
124    /// Whether to escape Unicode characters
125    pub escape_unicode: bool,
126    /// Whether to flatten nested objects by default
127    pub flatten: bool,
128    /// Separator for flattened field names
129    pub flatten_separator: String,
130}
131
132/// Parquet format configuration
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ParquetConfig {
135    /// Default compression algorithm
136    pub compression: String,
137    /// Whether to write statistics
138    pub write_statistics: bool,
139    /// Row group size
140    pub row_group_size: usize,
141    /// Data page size limit
142    pub data_page_size: usize,
143}
144
145/// Display and output configuration
146#[derive(Debug, Clone, Serialize, Deserialize, Default)]
147pub struct DisplayConfig {
148    /// Whether to use colored output
149    pub color: ColorConfig,
150    /// Whether to use compact output by default
151    pub compact: bool,
152    /// Whether to sort object keys
153    pub sort_keys: bool,
154    /// Whether to use raw output for strings
155    pub raw_output: bool,
156    /// Whether to set exit status based on filter result
157    pub exit_status: bool,
158    /// Number format configuration
159    pub number_format: NumberFormatConfig,
160    /// Date/time format configuration
161    pub datetime_format: DateTimeFormatConfig,
162}
163
164/// Color configuration
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct ColorConfig {
167    /// Whether colors are enabled
168    pub enabled: Option<bool>,
169    /// Color scheme name
170    pub scheme: String,
171    /// Whether to detect terminal capabilities
172    pub auto_detect: bool,
173}
174
175/// Number formatting configuration
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct NumberFormatConfig {
178    /// Decimal precision for floats
179    pub float_precision: Option<usize>,
180    /// Whether to use scientific notation
181    pub scientific_notation: bool,
182    /// Threshold for scientific notation
183    pub scientific_threshold: f64,
184}
185
186/// Date/time formatting configuration
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct DateTimeFormatConfig {
189    /// Default date format
190    pub date_format: String,
191    /// Default datetime format
192    pub datetime_format: String,
193    /// Default time format
194    pub time_format: String,
195    /// Timezone handling
196    pub timezone: String,
197}
198
199/// Performance and resource configuration
200#[derive(Debug, Clone, Serialize, Deserialize)]
201#[serde(default)]
202pub struct PerformanceConfig {
203    /// Default batch size for processing
204    pub batch_size: usize,
205    /// Memory limit in bytes
206    pub memory_limit: Option<usize>,
207    /// Number of threads to use (0 = auto)
208    pub threads: usize,
209    /// Whether to enable parallel execution
210    pub parallel: bool,
211    /// Cache size for repeated operations
212    pub cache_size: usize,
213}
214
215/// Module and library configuration
216#[derive(Debug, Clone, Serialize, Deserialize, Default)]
217pub struct ModuleConfig {
218    /// Library search paths
219    pub library_paths: Vec<PathBuf>,
220    /// Auto-load modules
221    pub auto_load: Vec<String>,
222    /// Module cache directory
223    pub cache_dir: Option<PathBuf>,
224}
225
226/// Debug and diagnostic configuration
227#[derive(Debug, Clone, Serialize, Deserialize)]
228#[serde(default)]
229#[derive(Default)]
230pub struct DebugConfig {
231    /// Verbosity level
232    pub verbosity: u8,
233    /// Whether to show execution plans
234    pub show_plans: bool,
235    /// Whether to show timing information
236    pub show_timing: bool,
237    /// Whether to enable debug mode
238    pub debug_mode: bool,
239    /// Log file path
240    pub log_file: Option<PathBuf>,
241}
242
243impl Default for IoConfig {
244    fn default() -> Self {
245        Self {
246            default_input_format: None,
247            default_output_format: None,
248            auto_detect_format: true,
249            buffer_size: 8192,
250            overwrite_by_default: false,
251            max_memory_file_size: 100 * 1024 * 1024, // 100MB
252            limit: None,
253        }
254    }
255}
256
257impl Default for FilterConfig {
258    fn default() -> Self {
259        Self {
260            lazy_evaluation: true,
261            dataframe_optimizations: true,
262            optimization_level: "basic".to_string(),
263            max_recursion_depth: 1000,
264            max_execution_time: Some(300), // 5 minutes
265            collect_stats: false,
266            error_mode: "strict".to_string(),
267        }
268    }
269}
270
271impl Default for CsvConfig {
272    fn default() -> Self {
273        Self {
274            separator: ",".to_string(),
275            has_header: true,
276            quote_char: "\"".to_string(),
277            comment_char: None,
278            null_values: vec!["".to_string(), "NA".to_string(), "NULL".to_string()],
279            trim_whitespace: false,
280            infer_schema_length: 1000,
281        }
282    }
283}
284
285impl Default for JsonConfig {
286    fn default() -> Self {
287        Self {
288            pretty_print: true,
289            maintain_order: false,
290            escape_unicode: false,
291            flatten: false,
292            flatten_separator: ".".to_string(),
293        }
294    }
295}
296
297impl Default for ParquetConfig {
298    fn default() -> Self {
299        Self {
300            compression: "snappy".to_string(),
301            write_statistics: true,
302            row_group_size: 1024 * 1024,
303            data_page_size: 1024 * 1024,
304        }
305    }
306}
307
308impl Default for ColorConfig {
309    fn default() -> Self {
310        Self {
311            enabled: None,
312            scheme: "default".to_string(),
313            auto_detect: true,
314        }
315    }
316}
317
318impl Default for NumberFormatConfig {
319    fn default() -> Self {
320        Self {
321            float_precision: None,
322            scientific_notation: false,
323            scientific_threshold: 1e9,
324        }
325    }
326}
327
328impl Default for DateTimeFormatConfig {
329    fn default() -> Self {
330        Self {
331            date_format: "%Y-%m-%d".to_string(),
332            datetime_format: "%Y-%m-%d %H:%M:%S".to_string(),
333            time_format: "%H:%M:%S".to_string(),
334            timezone: "local".to_string(),
335        }
336    }
337}
338
339impl Default for PerformanceConfig {
340    fn default() -> Self {
341        Self {
342            batch_size: 10000,
343            memory_limit: None,
344            threads: 0, // Auto-detect
345            parallel: true,
346            cache_size: 100,
347        }
348    }
349}
350
351impl Config {
352    /// Create a new configuration with defaults
353    #[allow(dead_code)]
354    pub fn new() -> Self {
355        Self::default()
356    }
357
358    /// Load configuration from a specific file
359    pub fn load_from_file(path: &Path) -> Result<Self> {
360        let mut config = Self::default();
361        config.merge_file(path)?;
362        Ok(config)
363    }
364
365    /// Load configuration from multiple sources
366    pub fn load() -> Result<Self> {
367        Self::load_with_env_reader(|key| std::env::var(key).ok())
368    }
369
370    /// Load configuration with a custom environment variable reader
371    /// This allows for testing without environment variable leakage
372    fn load_with_env_reader<F>(env_reader: F) -> Result<Self>
373    where
374        F: Fn(&str) -> Option<String>,
375    {
376        let mut config = Self::default();
377
378        // 1. Load from config file if it exists
379        let home_dir = env_reader("HOME");
380        if let Some(config_path) = Self::find_config_file_with_home(None, home_dir.as_deref()) {
381            config.merge_file(&config_path)?;
382        }
383
384        // 2. Apply environment variables
385        config.merge_env_with_reader(env_reader)?;
386
387        Ok(config)
388    }
389
390    /// Find configuration file in standard locations
391    #[allow(dead_code)]
392    pub(crate) fn find_config_file(current_dir: Option<&Path>) -> Option<PathBuf> {
393        let home = std::env::var("HOME").ok();
394        Self::find_config_file_with_home(current_dir, home.as_deref())
395    }
396
397    /// Find configuration file with injectable home directory
398    fn find_config_file_with_home(
399        current_dir: Option<&Path>,
400        home_dir: Option<&str>,
401    ) -> Option<PathBuf> {
402        let current_dir_buf = if let Some(dir) = current_dir {
403            dir.to_path_buf()
404        } else {
405            std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf())
406        };
407        let current_dir = current_dir_buf.as_path();
408        let config_names = ["dsq.toml", ".dsq.toml", "dsq.yaml", ".dsq.yaml"];
409
410        // Check current directory
411        for &name in &config_names {
412            let path = current_dir.join(name);
413            if path.exists() {
414                return Some(path.canonicalize().unwrap_or(path));
415            }
416        }
417
418        // Check home directory
419        if let Some(home) = home_dir {
420            for name in &config_names {
421                let path = Path::new(&home).join(".config").join("dsq").join(name);
422                if path.exists() {
423                    return Some(path.canonicalize().unwrap_or(path));
424                }
425
426                let path = Path::new(&home).join(name);
427                if path.exists() {
428                    return Some(path.canonicalize().unwrap_or(path));
429                }
430            }
431        }
432
433        // Check system config
434        for name in &config_names {
435            let path = Path::new("/etc/dsq").join(name);
436            if path.exists() {
437                return Some(path.canonicalize().unwrap_or(path));
438            }
439        }
440
441        None
442    }
443
444    /// Merge configuration from file
445    pub fn merge_file(&mut self, path: &Path) -> Result<()> {
446        let content = fs::read_to_string(path)
447            .map_err(|e| Error::config(format!("Failed to read config file: {}", e)))?;
448
449        let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
450
451        match extension {
452            "toml" => {
453                let file_config: Config = toml::from_str(&content)
454                    .map_err(|e| Error::config(format!("Invalid TOML config: {}", e)))?;
455                self.merge(file_config);
456            }
457            "yaml" | "yml" => {
458                let file_config: Config = serde_yaml::from_str(&content)
459                    .map_err(|e| Error::config(format!("Invalid YAML config: {}", e)))?;
460                self.merge(file_config);
461            }
462            _ => return Err(Error::config("Unsupported config file format")),
463        }
464
465        Ok(())
466    }
467
468    /// Merge configuration from environment variables
469    #[allow(dead_code)]
470    fn merge_env(&mut self) -> Result<()> {
471        self.merge_env_with_reader(|key| std::env::var(key).ok())
472    }
473
474    /// Merge configuration from environment variables with custom reader
475    fn merge_env_with_reader<F>(&mut self, env_reader: F) -> Result<()>
476    where
477        F: Fn(&str) -> Option<String>,
478    {
479        // DSQ_LAZY
480        if let Some(val) = env_reader("DSQ_LAZY") {
481            self.filter.lazy_evaluation = val != "0" && val.to_lowercase() != "false";
482        }
483
484        // DSQ_COLORS
485        if let Some(val) = env_reader("DSQ_COLORS") {
486            self.display.color.enabled = Some(val != "0" && val.to_lowercase() != "false");
487        }
488
489        // DSQ_LIBRARY_PATH
490        if let Some(val) = env_reader("DSQ_LIBRARY_PATH") {
491            self.modules.library_paths = std::env::split_paths(&val).collect();
492        }
493
494        // DSQ_BATCH_SIZE
495        if let Some(val) = env_reader("DSQ_BATCH_SIZE") {
496            if let Ok(size) = val.parse() {
497                self.performance.batch_size = size;
498            } else {
499                self.performance.batch_size = PerformanceConfig::default().batch_size;
500            }
501        }
502
503        // DSQ_MEMORY_LIMIT
504        if let Some(val) = env_reader("DSQ_MEMORY_LIMIT") {
505            if let Ok(limit) = parse_memory_limit(&val) {
506                self.performance.memory_limit = Some(limit);
507            } else {
508                self.performance.memory_limit = PerformanceConfig::default().memory_limit;
509            }
510        }
511
512        // DSQ_THREADS
513        if let Some(val) = env_reader("DSQ_THREADS") {
514            if let Ok(threads) = val.parse() {
515                self.performance.threads = threads;
516            } else {
517                self.performance.threads = PerformanceConfig::default().threads;
518            }
519        }
520
521        // DSQ_DEBUG
522        if let Some(val) = env_reader("DSQ_DEBUG") {
523            self.debug.debug_mode = val != "0" && val.to_lowercase() != "false";
524        }
525
526        // DSQ_VERBOSITY
527        if let Some(val) = env_reader("DSQ_VERBOSITY") {
528            if let Ok(level) = val.parse() {
529                self.debug.verbosity = level;
530            } else {
531                self.debug.verbosity = DebugConfig::default().verbosity;
532            }
533        }
534
535        Ok(())
536    }
537
538    /// Merge another config into this one
539    fn merge(&mut self, other: Config) {
540        // Merge I/O config
541        if other.io.default_input_format.is_some() {
542            self.io.default_input_format = other.io.default_input_format;
543        }
544        if other.io.default_output_format.is_some() {
545            self.io.default_output_format = other.io.default_output_format;
546        }
547        if !other.io.auto_detect_format {
548            self.io.auto_detect_format = other.io.auto_detect_format;
549        }
550        if other.io.buffer_size != IoConfig::default().buffer_size {
551            self.io.buffer_size = other.io.buffer_size;
552        }
553        if other.io.overwrite_by_default {
554            self.io.overwrite_by_default = other.io.overwrite_by_default;
555        }
556        if other.io.max_memory_file_size != IoConfig::default().max_memory_file_size {
557            self.io.max_memory_file_size = other.io.max_memory_file_size;
558        }
559
560        // Merge filter config
561        if !other.filter.lazy_evaluation {
562            self.filter.lazy_evaluation = other.filter.lazy_evaluation;
563        }
564        if !other.filter.dataframe_optimizations {
565            self.filter.dataframe_optimizations = other.filter.dataframe_optimizations;
566        }
567        if other.filter.optimization_level != FilterConfig::default().optimization_level {
568            self.filter.optimization_level = other.filter.optimization_level;
569        }
570        if other.filter.max_recursion_depth != FilterConfig::default().max_recursion_depth {
571            self.filter.max_recursion_depth = other.filter.max_recursion_depth;
572        }
573        if other.filter.max_execution_time.is_some() {
574            self.filter.max_execution_time = other.filter.max_execution_time;
575        }
576        if other.filter.collect_stats {
577            self.filter.collect_stats = other.filter.collect_stats;
578        }
579        if other.filter.error_mode != FilterConfig::default().error_mode {
580            self.filter.error_mode = other.filter.error_mode;
581        }
582
583        // Merge format configs
584        self.merge_csv_config(other.formats.csv);
585        self.merge_json_config(other.formats.json);
586        self.merge_parquet_config(other.formats.parquet);
587
588        // Merge display config
589        self.merge_display_config(other.display);
590
591        // Merge performance config
592        if other.performance.batch_size != PerformanceConfig::default().batch_size {
593            self.performance.batch_size = other.performance.batch_size;
594        }
595        if other.performance.memory_limit.is_some() {
596            self.performance.memory_limit = other.performance.memory_limit;
597        }
598        if other.performance.threads != PerformanceConfig::default().threads {
599            self.performance.threads = other.performance.threads;
600        }
601        if !other.performance.parallel {
602            self.performance.parallel = other.performance.parallel;
603        }
604        if other.performance.cache_size != PerformanceConfig::default().cache_size {
605            self.performance.cache_size = other.performance.cache_size;
606        }
607
608        // Merge module config
609        if !other.modules.library_paths.is_empty() {
610            self.modules
611                .library_paths
612                .extend(other.modules.library_paths);
613        }
614        if !other.modules.auto_load.is_empty() {
615            self.modules.auto_load.extend(other.modules.auto_load);
616        }
617        if other.modules.cache_dir.is_some() {
618            self.modules.cache_dir = other.modules.cache_dir;
619        }
620
621        // Merge debug config
622        if other.debug.verbosity != DebugConfig::default().verbosity {
623            self.debug.verbosity = other.debug.verbosity;
624        }
625        if other.debug.show_plans {
626            self.debug.show_plans = other.debug.show_plans;
627        }
628        if other.debug.show_timing {
629            self.debug.show_timing = other.debug.show_timing;
630        }
631        if other.debug.debug_mode {
632            self.debug.debug_mode = other.debug.debug_mode;
633        }
634        if other.debug.log_file.is_some() {
635            self.debug.log_file = other.debug.log_file;
636        }
637
638        // Merge variables (extend, don't replace)
639        for (key, value) in other.variables {
640            self.variables.insert(key, value);
641        }
642    }
643
644    /// Merge CSV format config
645    fn merge_csv_config(&mut self, other: CsvConfig) {
646        if other.separator != CsvConfig::default().separator {
647            self.formats.csv.separator = other.separator;
648        }
649        if !other.has_header {
650            self.formats.csv.has_header = other.has_header;
651        }
652        if other.quote_char != CsvConfig::default().quote_char {
653            self.formats.csv.quote_char = other.quote_char;
654        }
655        if other.comment_char.is_some() {
656            self.formats.csv.comment_char = other.comment_char;
657        }
658        if !other.null_values.is_empty() {
659            self.formats.csv.null_values = other.null_values;
660        }
661        if other.trim_whitespace {
662            self.formats.csv.trim_whitespace = other.trim_whitespace;
663        }
664        if other.infer_schema_length != CsvConfig::default().infer_schema_length {
665            self.formats.csv.infer_schema_length = other.infer_schema_length;
666        }
667    }
668
669    /// Merge JSON format config
670    fn merge_json_config(&mut self, other: JsonConfig) {
671        if !other.pretty_print {
672            self.formats.json.pretty_print = other.pretty_print;
673        }
674        if other.maintain_order {
675            self.formats.json.maintain_order = other.maintain_order;
676        }
677        if other.escape_unicode {
678            self.formats.json.escape_unicode = other.escape_unicode;
679        }
680        if other.flatten {
681            self.formats.json.flatten = other.flatten;
682        }
683        if other.flatten_separator != JsonConfig::default().flatten_separator {
684            self.formats.json.flatten_separator = other.flatten_separator;
685        }
686    }
687
688    /// Merge Parquet format config
689    fn merge_parquet_config(&mut self, other: ParquetConfig) {
690        if other.compression != ParquetConfig::default().compression {
691            self.formats.parquet.compression = other.compression;
692        }
693        if !other.write_statistics {
694            self.formats.parquet.write_statistics = other.write_statistics;
695        }
696        if other.row_group_size != ParquetConfig::default().row_group_size {
697            self.formats.parquet.row_group_size = other.row_group_size;
698        }
699        if other.data_page_size != ParquetConfig::default().data_page_size {
700            self.formats.parquet.data_page_size = other.data_page_size;
701        }
702    }
703
704    /// Merge display config
705    fn merge_display_config(&mut self, other: DisplayConfig) {
706        // Merge color config
707        if other.color.enabled.is_some() {
708            self.display.color.enabled = other.color.enabled;
709        }
710        if other.color.scheme != ColorConfig::default().scheme {
711            self.display.color.scheme = other.color.scheme;
712        }
713        if !other.color.auto_detect {
714            self.display.color.auto_detect = other.color.auto_detect;
715        }
716
717        if other.compact {
718            self.display.compact = other.compact;
719        }
720        if other.sort_keys {
721            self.display.sort_keys = other.sort_keys;
722        }
723        if other.raw_output {
724            self.display.raw_output = other.raw_output;
725        }
726        if other.exit_status {
727            self.display.exit_status = other.exit_status;
728        }
729
730        // Merge number format
731        if other.number_format.float_precision.is_some() {
732            self.display.number_format.float_precision = other.number_format.float_precision;
733        }
734        if other.number_format.scientific_notation {
735            self.display.number_format.scientific_notation =
736                other.number_format.scientific_notation;
737        }
738        if (other.number_format.scientific_threshold
739            - NumberFormatConfig::default().scientific_threshold)
740            .abs()
741            > f64::EPSILON
742        {
743            self.display.number_format.scientific_threshold =
744                other.number_format.scientific_threshold;
745        }
746
747        // Merge datetime format
748        if other.datetime_format.date_format != DateTimeFormatConfig::default().date_format {
749            self.display.datetime_format.date_format = other.datetime_format.date_format;
750        }
751        if other.datetime_format.datetime_format != DateTimeFormatConfig::default().datetime_format
752        {
753            self.display.datetime_format.datetime_format = other.datetime_format.datetime_format;
754        }
755        if other.datetime_format.time_format != DateTimeFormatConfig::default().time_format {
756            self.display.datetime_format.time_format = other.datetime_format.time_format;
757        }
758        if other.datetime_format.timezone != DateTimeFormatConfig::default().timezone {
759            self.display.datetime_format.timezone = other.datetime_format.timezone;
760        }
761    }
762
763    /// Apply CLI configuration overrides
764    #[cfg(feature = "cli")]
765    pub fn apply_cli(&mut self, cli_config: &CliConfig) -> Result<()> {
766        // I/O settings
767        if let Some(format) = &cli_config.input_format {
768            self.io.default_input_format = Some(*format);
769        }
770        if let Some(format) = &cli_config.output_format {
771            self.io.default_output_format = Some(*format);
772        }
773        self.io.limit = cli_config.limit;
774
775        // Filter settings
776        self.filter.lazy_evaluation = cli_config.lazy;
777        self.filter.dataframe_optimizations = cli_config.dataframe_optimizations;
778
779        // Display settings
780        self.display.compact = cli_config.compact_output;
781        self.display.raw_output = cli_config.raw_output;
782        self.display.sort_keys = cli_config.sort_keys;
783        self.display.exit_status = cli_config.exit_status;
784        if let Some(color) = cli_config.color_output {
785            self.display.color.enabled = Some(color);
786        }
787
788        // CSV settings
789        if let Some(sep) = &cli_config.csv_separator {
790            self.formats.csv.separator = sep.clone();
791        }
792        if let Some(has_header) = cli_config.csv_headers {
793            self.formats.csv.has_header = has_header;
794        }
795
796        // Performance settings
797        if let Some(batch_size) = cli_config.batch_size {
798            self.performance.batch_size = batch_size;
799        }
800        if let Some(limit) = &cli_config.memory_limit {
801            self.performance.memory_limit = Some(parse_memory_limit(limit)?);
802        }
803
804        // Module settings
805        if !cli_config.library_path.is_empty() {
806            self.modules.library_paths = cli_config.library_path.clone();
807        }
808
809        // Debug settings
810        self.debug.verbosity = cli_config.verbose;
811        self.debug.show_plans = cli_config.explain;
812
813        // Variables
814        self.variables = cli_config.variables.clone();
815
816        Ok(())
817    }
818
819    /// Convert to ReadOptions for dsq-core
820    pub fn to_read_options(&self) -> ReadOptions {
821        ReadOptions {
822            infer_schema: true,
823            n_rows: None,
824            skip_rows: 0,
825            chunk_size: None,
826            use_mmap: false,
827        }
828    }
829
830    /// Convert to WriteOptions for dsq-core
831    pub fn to_write_options(&self) -> WriteOptions {
832        WriteOptions {
833            include_header: true,
834            compression: None,
835        }
836    }
837
838    /// Convert to ExecutorConfig for dsq-filter
839    pub fn to_executor_config(&self) -> ExecutorConfig {
840        let variables = self
841            .variables
842            .iter()
843            .map(|(k, v)| (k.clone(), dsq_shared::value::Value::from_json(v.clone())))
844            .collect();
845
846        let error_mode = match self.filter.error_mode.as_str() {
847            "strict" => ErrorMode::Strict,
848            "collect" => ErrorMode::Collect,
849            "ignore" => ErrorMode::Ignore,
850            _ => ErrorMode::Strict,
851        };
852
853        ExecutorConfig {
854            timeout_ms: self.filter.max_execution_time.map(|s| s * 1000), // convert seconds to ms
855            error_mode,
856            collect_stats: self.filter.collect_stats,
857            max_recursion_depth: self.filter.max_recursion_depth,
858            debug_mode: self.debug.debug_mode,
859            batch_size: self.performance.batch_size,
860            variables,
861            filter_cache_size: 100,
862        }
863    }
864
865    /// Get format-specific read options
866    #[allow(dead_code)]
867    pub fn get_format_read_options(&self, format: DataFormat) -> ReadOptions {
868        let mut options = ReadOptions {
869            infer_schema: true,
870            n_rows: None,
871            skip_rows: 0,
872            chunk_size: None,
873            use_mmap: false,
874        };
875
876        match format {
877            DataFormat::Csv => {
878                options.infer_schema = true;
879                // Could add CSV-specific options like encoding, etc.
880            }
881            DataFormat::Json | DataFormat::JsonLines => {
882                options.infer_schema = true;
883                // Could add JSON-specific options
884            }
885            DataFormat::Parquet => {
886                // Parquet typically doesn't need schema inference
887                options.infer_schema = false;
888            }
889            _ => {
890                // Use defaults for other formats
891            }
892        }
893
894        options
895    }
896
897    /// Get format-specific write options
898    #[allow(dead_code)]
899    pub fn get_format_write_options(&self, format: DataFormat) -> WriteOptions {
900        let mut options = WriteOptions {
901            include_header: true,
902            compression: None,
903        };
904
905        match format {
906            DataFormat::Csv => {
907                options.include_header = self.formats.csv.has_header;
908                // Could add CSV-specific options like separator, quote_char, etc.
909            }
910            DataFormat::Json | DataFormat::JsonLines => {
911                // JSON-specific write options could include pretty_print, etc.
912                options.include_header = false; // JSON doesn't have headers
913            }
914            DataFormat::Parquet => {
915                options.compression = Some(self.formats.parquet.compression.clone());
916                options.include_header = false; // Parquet doesn't have headers in the same way
917            }
918            _ => {
919                // Use defaults for other formats
920            }
921        }
922
923        options
924    }
925
926    /// Check if color output should be enabled
927    #[allow(dead_code)]
928    pub fn should_use_color(&self) -> bool {
929        match self.display.color.enabled {
930            Some(enabled) => enabled,
931            None if self.display.color.auto_detect => {
932                // Auto-detect based on terminal capabilities
933                atty::is(atty::Stream::Stdout)
934                    && std::env::var("TERM").map(|t| t != "dumb").unwrap_or(true)
935            }
936            None => false,
937        }
938    }
939
940    /// Get the number of threads to use
941    #[allow(dead_code)]
942    pub fn get_thread_count(&self) -> usize {
943        if self.performance.threads == 0 {
944            num_cpus::get()
945        } else {
946            self.performance.threads
947        }
948    }
949
950    /// Get variables as dsq_core::Value map for filter execution
951    #[allow(dead_code)]
952    pub fn get_variables_as_value(&self) -> std::collections::HashMap<String, dsq_core::Value> {
953        self.variables
954            .iter()
955            .map(|(k, v)| {
956                let value = dsq_core::Value::from_json(v.clone());
957                (k.clone(), value)
958            })
959            .collect()
960    }
961
962    /// Save configuration to file
963    pub fn save(&self, path: &Path) -> Result<()> {
964        let extension = path
965            .extension()
966            .and_then(|ext| ext.to_str())
967            .unwrap_or("toml");
968
969        let content = match extension {
970            "toml" => toml::to_string_pretty(self)
971                .map_err(|e| Error::config(format!("Failed to serialize config: {}", e)))?,
972            "yaml" | "yml" => serde_yaml::to_string(self)
973                .map_err(|e| Error::config(format!("Failed to serialize config: {}", e)))?,
974            _ => return Err(Error::config("Unsupported config file format")),
975        };
976
977        fs::write(path, content)
978            .map_err(|e| Error::config(format!("Failed to write config file: {}", e)))?;
979
980        Ok(())
981    }
982}
983
984/// Parse memory limit string into bytes
985fn parse_memory_limit(limit: &str) -> Result<usize> {
986    let limit = limit.to_uppercase();
987
988    if let Some(num_str) = limit.strip_suffix("GB") {
989        let num: usize = num_str
990            .parse()
991            .map_err(|_| Error::config(format!("Invalid memory limit: {}", limit)))?;
992        Ok(num * 1024 * 1024 * 1024)
993    } else if let Some(num_str) = limit.strip_suffix("MB") {
994        let num: usize = num_str
995            .parse()
996            .map_err(|_| Error::config(format!("Invalid memory limit: {}", limit)))?;
997        Ok(num * 1024 * 1024)
998    } else if let Some(num_str) = limit.strip_suffix("KB") {
999        let num: usize = num_str
1000            .parse()
1001            .map_err(|_| Error::config(format!("Invalid memory limit: {}", limit)))?;
1002        Ok(num * 1024)
1003    } else if let Some(num_str) = limit.strip_suffix("B") {
1004        num_str
1005            .parse()
1006            .map_err(|_| Error::config(format!("Invalid memory limit: {}", limit)))
1007    } else {
1008        // Try parsing as plain number (bytes)
1009        limit.parse().map_err(|_| {
1010            Error::config(format!(
1011                "Invalid memory limit: {} (use format like '1GB', '500MB')",
1012                limit
1013            ))
1014        })
1015    }
1016}
1017
1018/// Create a default config file template
1019#[allow(dead_code)]
1020pub fn create_default_config_file(path: &Path) -> Result<()> {
1021    let config = Config::default();
1022    config.save(path)?;
1023    Ok(())
1024}
1025
1026/// Validate configuration
1027#[allow(dead_code)]
1028pub fn validate_config(config: &Config) -> Result<()> {
1029    // Validate performance settings
1030    if config.performance.batch_size == 0 {
1031        return Err(Error::config("Batch size must be greater than 0"));
1032    }
1033
1034    if config.performance.threads > 1024 {
1035        return Err(Error::config("Thread count seems unreasonably high"));
1036    }
1037
1038    // Validate filter settings
1039    if config.filter.max_recursion_depth == 0 {
1040        return Err(Error::config("Max recursion depth must be greater than 0"));
1041    }
1042
1043    // Validate format settings
1044    if config.formats.csv.separator.len() != 1 {
1045        return Err(Error::config("CSV separator must be a single character"));
1046    }
1047
1048    if config.formats.csv.quote_char.len() != 1 {
1049        return Err(Error::config(
1050            "CSV quote character must be a single character",
1051        ));
1052    }
1053
1054    // Validate paths
1055    for path in &config.modules.library_paths {
1056        if !path.exists() {
1057            eprintln!("Warning: Library path does not exist: {}", path.display());
1058        }
1059    }
1060
1061    Ok(())
1062}
1063
1064#[cfg(test)]
1065mod tests {
1066    use std::fs;
1067
1068    use tempfile::TempDir;
1069
1070    use super::*;
1071
1072    #[test]
1073    fn test_default_config() {
1074        let config = Config::default();
1075        assert!(config.filter.lazy_evaluation);
1076        assert!(config.filter.dataframe_optimizations);
1077        assert_eq!(config.performance.batch_size, 10000);
1078        assert_eq!(config.formats.csv.separator, ",");
1079    }
1080
1081    #[test]
1082    fn test_all_default_implementations() {
1083        // Test Config default
1084        let config = Config::default();
1085        assert!(config.filter.lazy_evaluation);
1086        assert_eq!(config.filter.optimization_level, "basic");
1087        assert_eq!(config.performance.batch_size, 10000);
1088        assert_eq!(config.debug.verbosity, 0);
1089        assert!(config.variables.is_empty());
1090
1091        // Test IoConfig default
1092        let io = IoConfig::default();
1093        assert!(io.auto_detect_format);
1094        assert_eq!(io.buffer_size, 8192);
1095        assert!(!io.overwrite_by_default);
1096        assert_eq!(io.max_memory_file_size, 100 * 1024 * 1024);
1097
1098        // Test FilterConfig default
1099        let filter = FilterConfig::default();
1100        assert!(filter.lazy_evaluation);
1101        assert!(filter.dataframe_optimizations);
1102        assert_eq!(filter.optimization_level, "basic");
1103        assert_eq!(filter.max_recursion_depth, 1000);
1104        assert_eq!(filter.max_execution_time, Some(300));
1105        assert!(!filter.collect_stats);
1106        assert_eq!(filter.error_mode, "strict");
1107
1108        // Test FormatConfigs default
1109        let formats = FormatConfigs::default();
1110        assert_eq!(formats.csv.separator, ",");
1111        assert!(formats.csv.has_header);
1112        assert!(formats.json.pretty_print);
1113        assert_eq!(formats.parquet.compression, "snappy");
1114
1115        // Test CsvConfig default
1116        let csv = CsvConfig::default();
1117        assert_eq!(csv.separator, ",");
1118        assert!(csv.has_header);
1119        assert_eq!(csv.quote_char, "\"");
1120        assert_eq!(
1121            csv.null_values,
1122            vec!["".to_string(), "NA".to_string(), "NULL".to_string()]
1123        );
1124        assert!(!csv.trim_whitespace);
1125        assert_eq!(csv.infer_schema_length, 1000);
1126
1127        // Test JsonConfig default
1128        let json = JsonConfig::default();
1129        assert!(json.pretty_print);
1130        assert!(!json.maintain_order);
1131        assert!(!json.escape_unicode);
1132        assert!(!json.flatten);
1133        assert_eq!(json.flatten_separator, ".");
1134
1135        // Test ParquetConfig default
1136        let parquet = ParquetConfig::default();
1137        assert_eq!(parquet.compression, "snappy");
1138        assert!(parquet.write_statistics);
1139        assert_eq!(parquet.row_group_size, 1024 * 1024);
1140        assert_eq!(parquet.data_page_size, 1024 * 1024);
1141
1142        // Test DisplayConfig default
1143        let display = DisplayConfig::default();
1144        assert_eq!(display.color.scheme, "default");
1145        assert!(display.color.auto_detect);
1146        assert!(!display.compact);
1147        assert!(!display.sort_keys);
1148        assert!(!display.raw_output);
1149
1150        // Test ColorConfig default
1151        let color = ColorConfig::default();
1152        assert!(color.enabled.is_none());
1153        assert_eq!(color.scheme, "default");
1154        assert!(color.auto_detect);
1155
1156        // Test NumberFormatConfig default
1157        let number = NumberFormatConfig::default();
1158        assert!(number.float_precision.is_none());
1159        assert!(!number.scientific_notation);
1160        assert_eq!(number.scientific_threshold, 1e9);
1161
1162        // Test DateTimeFormatConfig default
1163        let datetime = DateTimeFormatConfig::default();
1164        assert_eq!(datetime.date_format, "%Y-%m-%d");
1165        assert_eq!(datetime.datetime_format, "%Y-%m-%d %H:%M:%S");
1166        assert_eq!(datetime.time_format, "%H:%M:%S");
1167        assert_eq!(datetime.timezone, "local");
1168
1169        // Test PerformanceConfig default
1170        let perf = PerformanceConfig::default();
1171        assert_eq!(perf.batch_size, 10000);
1172        assert!(perf.memory_limit.is_none());
1173        assert_eq!(perf.threads, 0);
1174        assert!(perf.parallel);
1175        assert_eq!(perf.cache_size, 100);
1176
1177        // Test ModuleConfig default
1178        let module = ModuleConfig::default();
1179        assert!(module.library_paths.is_empty());
1180        assert!(module.auto_load.is_empty());
1181        assert!(module.cache_dir.is_none());
1182
1183        // Test DebugConfig default
1184        let debug = DebugConfig::default();
1185        assert_eq!(debug.verbosity, 0);
1186        assert!(!debug.show_plans);
1187        assert!(!debug.show_timing);
1188        assert!(!debug.debug_mode);
1189        assert!(debug.log_file.is_none());
1190    }
1191
1192    #[test]
1193    fn test_config_new() {
1194        let config = Config::new();
1195        // Should be same as default
1196        assert_eq!(
1197            config.filter.lazy_evaluation,
1198            Config::default().filter.lazy_evaluation
1199        );
1200        assert_eq!(
1201            config.performance.batch_size,
1202            Config::default().performance.batch_size
1203        );
1204    }
1205
1206    #[test]
1207    fn test_parse_memory_limit() {
1208        assert_eq!(parse_memory_limit("1GB").unwrap(), 1024 * 1024 * 1024);
1209        assert_eq!(parse_memory_limit("500MB").unwrap(), 500 * 1024 * 1024);
1210        assert_eq!(parse_memory_limit("1024KB").unwrap(), 1024 * 1024);
1211        assert_eq!(parse_memory_limit("2048B").unwrap(), 2048);
1212        assert_eq!(parse_memory_limit("2048").unwrap(), 2048);
1213
1214        // Test case insensitivity
1215        assert_eq!(parse_memory_limit("1gb").unwrap(), 1024 * 1024 * 1024);
1216        assert_eq!(parse_memory_limit("500mb").unwrap(), 500 * 1024 * 1024);
1217
1218        // Test invalid formats
1219        assert!(parse_memory_limit("invalid").is_err());
1220        assert!(parse_memory_limit("1XB").is_err());
1221        assert!(parse_memory_limit("").is_err());
1222        assert!(parse_memory_limit("GB").is_err());
1223        assert!(parse_memory_limit("1.5GB").is_err()); // floats not supported
1224    }
1225
1226    #[test]
1227    fn test_config_validation() {
1228        let mut config = Config::default();
1229        assert!(validate_config(&config).is_ok());
1230
1231        // Test batch size validation
1232        config.performance.batch_size = 0;
1233        assert!(validate_config(&config).is_err());
1234
1235        // Test recursion depth validation
1236        config = Config::default();
1237        config.filter.max_recursion_depth = 0;
1238        assert!(validate_config(&config).is_err());
1239
1240        // Test CSV separator validation
1241        config = Config::default();
1242        config.formats.csv.separator = ",,".to_string();
1243        assert!(validate_config(&config).is_err());
1244
1245        config.formats.csv.separator = "".to_string();
1246        assert!(validate_config(&config).is_err());
1247
1248        // Test CSV quote validation
1249        config = Config::default();
1250        config.formats.csv.quote_char = "quote".to_string();
1251        assert!(validate_config(&config).is_err());
1252
1253        // Test thread count validation (should be ok up to high numbers)
1254        config = Config::default();
1255        config.performance.threads = 1024;
1256        assert!(validate_config(&config).is_ok());
1257
1258        config.performance.threads = 1025;
1259        assert!(validate_config(&config).is_err());
1260    }
1261
1262    #[test]
1263    fn test_find_config_file() {
1264        let temp_dir = TempDir::new().unwrap();
1265        let temp_path = temp_dir.path();
1266
1267        // Test no config files exist (no HOME set)
1268        assert!(Config::find_config_file_with_home(Some(temp_path), None).is_none());
1269
1270        // Create a config file in current directory
1271        fs::write(temp_path.join("dsq.toml"), "test").unwrap();
1272        assert_eq!(
1273            Config::find_config_file_with_home(Some(temp_path), None).unwrap(),
1274            temp_path.join("dsq.toml")
1275        );
1276
1277        // Test priority: current dir first
1278        fs::write(temp_path.join("dsq.yaml"), "test").unwrap();
1279        assert_eq!(
1280            Config::find_config_file_with_home(Some(temp_path), None).unwrap(),
1281            temp_path.join("dsq.toml")
1282        );
1283
1284        // Remove toml, should find yaml
1285        fs::remove_file(temp_path.join("dsq.toml")).unwrap();
1286        assert_eq!(
1287            Config::find_config_file_with_home(Some(temp_path), None).unwrap(),
1288            temp_path.join("dsq.yaml")
1289        );
1290
1291        // Test hidden files
1292        fs::write(temp_path.join(".dsq.toml"), "test").unwrap();
1293        assert_eq!(
1294            Config::find_config_file_with_home(Some(temp_path), None).unwrap(),
1295            temp_path.join(".dsq.toml")
1296        ); // hidden toml comes before yaml
1297
1298        // Remove yaml, should find hidden toml
1299        fs::remove_file(temp_path.join("dsq.yaml")).unwrap();
1300        assert_eq!(
1301            Config::find_config_file_with_home(Some(temp_path), None).unwrap(),
1302            temp_path.join(".dsq.toml")
1303        );
1304    }
1305
1306    #[test]
1307    fn test_merge_file_toml() {
1308        let temp_dir = TempDir::new().unwrap();
1309        let config_path = temp_dir.path().join("test.toml");
1310
1311        let toml_content = r#"
1312[filter]
1313lazy_evaluation = false
1314dataframe_optimizations = true
1315optimization_level = "advanced"
1316
1317[formats.csv]
1318separator = "|"
1319
1320[performance]
1321batch_size = 5000
1322"#;
1323
1324        fs::write(&config_path, toml_content).unwrap();
1325
1326        let mut config = Config::default();
1327        config.merge_file(&config_path).unwrap();
1328
1329        assert!(!config.filter.lazy_evaluation);
1330        assert_eq!(config.filter.optimization_level, "advanced");
1331        assert_eq!(config.formats.csv.separator, "|");
1332        assert_eq!(config.performance.batch_size, 5000);
1333    }
1334
1335    #[test]
1336    fn test_merge_file_yaml() {
1337        let temp_dir = TempDir::new().unwrap();
1338        let config_path = temp_dir.path().join("test.yaml");
1339
1340        let yaml_content = r#"
1341filter:
1342  lazy_evaluation: false
1343  dataframe_optimizations: true
1344  optimization_level: advanced
1345formats:
1346  csv:
1347    separator: "|"
1348performance:
1349  batch_size: 5000
1350"#;
1351
1352        fs::write(&config_path, yaml_content).unwrap();
1353
1354        let mut config = Config::default();
1355        config.merge_file(&config_path).unwrap();
1356
1357        assert!(!config.filter.lazy_evaluation);
1358        assert_eq!(config.filter.optimization_level, "advanced");
1359        assert_eq!(config.formats.csv.separator, "|");
1360        assert_eq!(config.performance.batch_size, 5000);
1361    }
1362
1363    #[test]
1364    fn test_merge_file_errors() {
1365        let temp_dir = TempDir::new().unwrap();
1366        let config_path = temp_dir.path().join("invalid.toml");
1367
1368        // Test invalid TOML
1369        fs::write(&config_path, "invalid toml content [").unwrap();
1370        let mut config = Config::default();
1371        assert!(config.merge_file(&config_path).is_err());
1372
1373        // Test unsupported extension
1374        let config_path = temp_dir.path().join("config.json");
1375        fs::write(&config_path, "{}").unwrap();
1376        assert!(config.merge_file(&config_path).is_err());
1377
1378        // Test non-existent file
1379        let config_path = temp_dir.path().join("nonexistent.toml");
1380        assert!(config.merge_file(&config_path).is_err());
1381    }
1382
1383    #[test]
1384    fn test_merge_env() {
1385        let mut config = Config::default();
1386
1387        // Use a mock environment reader to avoid interference with other tests
1388        let env_reader = |key: &str| match key {
1389            "DSQ_LAZY" => Some("false".to_string()),
1390            "DSQ_COLORS" => Some("true".to_string()),
1391            "DSQ_LIBRARY_PATH" => Some("/lib1:/lib2".to_string()),
1392            "DSQ_BATCH_SIZE" => Some("2500".to_string()),
1393            "DSQ_MEMORY_LIMIT" => Some("1GB".to_string()),
1394            "DSQ_THREADS" => Some("8".to_string()),
1395            "DSQ_DEBUG" => Some("true".to_string()),
1396            "DSQ_VERBOSITY" => Some("2".to_string()),
1397            _ => None,
1398        };
1399
1400        config.merge_env_with_reader(env_reader).unwrap();
1401
1402        assert!(!config.filter.lazy_evaluation);
1403        assert_eq!(config.display.color.enabled, Some(true));
1404        assert_eq!(
1405            config.modules.library_paths,
1406            vec![
1407                std::path::PathBuf::from("/lib1"),
1408                std::path::PathBuf::from("/lib2")
1409            ]
1410        );
1411        assert_eq!(config.performance.batch_size, 2500);
1412        assert_eq!(config.performance.memory_limit, Some(1024 * 1024 * 1024));
1413        assert_eq!(config.performance.threads, 8);
1414        assert!(config.debug.debug_mode);
1415        assert_eq!(config.debug.verbosity, 2);
1416    }
1417
1418    #[test]
1419    fn test_merge_env_invalid_values() {
1420        let mut config = Config::default();
1421
1422        // Use a mock environment reader with invalid values
1423        let env_reader = |key: &str| match key {
1424            "DSQ_MEMORY_LIMIT" => Some("invalid".to_string()),
1425            "DSQ_THREADS" => Some("invalid".to_string()),
1426            "DSQ_VERBOSITY" => Some("invalid".to_string()),
1427            _ => None,
1428        };
1429
1430        // Should not panic, should keep defaults
1431        config.merge_env_with_reader(env_reader).unwrap();
1432        assert_eq!(config.performance.batch_size, 10000); // default
1433        assert!(config.performance.memory_limit.is_none()); // default
1434        assert_eq!(config.performance.threads, 0); // default
1435        assert_eq!(config.debug.verbosity, 0); // default
1436    }
1437
1438    #[test]
1439    #[cfg(feature = "cli")]
1440    fn test_cli_override_comprehensive() {
1441        let mut config = Config::default();
1442        let mut cli_config = CliConfig {
1443            lazy: false,
1444            dataframe_optimizations: false,
1445            compact_output: true,
1446            raw_output: true,
1447            sort_keys: true,
1448            color_output: Some(true),
1449            csv_separator: Some("|".to_string()),
1450            csv_headers: Some(false),
1451            batch_size: Some(5000),
1452            memory_limit: Some("2GB".to_string()),
1453            library_path: vec![std::path::PathBuf::from("/lib")],
1454            verbose: 2,
1455            explain: true,
1456            ..Default::default()
1457        };
1458        cli_config
1459            .variables
1460            .insert("test".to_string(), serde_json::json!("value"));
1461
1462        config.apply_cli(&cli_config).unwrap();
1463
1464        assert!(!config.filter.lazy_evaluation);
1465        assert!(!config.filter.dataframe_optimizations);
1466        assert!(config.display.compact);
1467        assert!(config.display.raw_output);
1468        assert!(config.display.sort_keys);
1469        assert_eq!(config.display.color.enabled, Some(true));
1470        assert_eq!(config.formats.csv.separator, "|");
1471        assert!(!config.formats.csv.has_header);
1472        assert_eq!(config.performance.batch_size, 5000);
1473        assert_eq!(
1474            config.performance.memory_limit,
1475            Some(2 * 1024 * 1024 * 1024)
1476        );
1477        assert_eq!(
1478            config.modules.library_paths,
1479            vec![std::path::PathBuf::from("/lib")]
1480        );
1481        assert_eq!(config.debug.verbosity, 2);
1482        assert!(config.debug.show_plans);
1483        assert_eq!(config.variables["test"], serde_json::json!("value"));
1484    }
1485
1486    #[test]
1487    fn test_format_options_expanded() {
1488        let mut config = Config::default();
1489
1490        // Test CSV write options
1491        let csv_write = config.get_format_write_options(DataFormat::Csv);
1492        assert!(csv_write.include_header); // default
1493
1494        config.formats.csv.has_header = false;
1495        let csv_write = config.get_format_write_options(DataFormat::Csv);
1496        assert!(!csv_write.include_header);
1497
1498        // Test Parquet write options
1499        let parquet_write = config.get_format_write_options(DataFormat::Parquet);
1500        assert_eq!(parquet_write.compression, Some("snappy".to_string()));
1501
1502        config.formats.parquet.compression = "gzip".to_string();
1503        let parquet_write = config.get_format_write_options(DataFormat::Parquet);
1504        assert_eq!(parquet_write.compression, Some("gzip".to_string()));
1505
1506        // Test read options (currently defaults)
1507        let _csv_read = config.get_format_read_options(DataFormat::Csv);
1508        let _json_read = config.get_format_read_options(DataFormat::Json);
1509        let _parquet_read = config.get_format_read_options(DataFormat::Parquet);
1510    }
1511
1512    #[test]
1513    fn test_thread_count() {
1514        let mut config = Config::default();
1515
1516        // Auto-detect
1517        config.performance.threads = 0;
1518        assert!(config.get_thread_count() > 0);
1519
1520        // Explicit count
1521        config.performance.threads = 4;
1522        assert_eq!(config.get_thread_count(), 4);
1523    }
1524
1525    #[test]
1526    fn test_should_use_color() {
1527        let mut config = Config::default();
1528
1529        // Explicit true
1530        config.display.color.enabled = Some(true);
1531        assert!(config.should_use_color());
1532
1533        // Explicit false
1534        config.display.color.enabled = Some(false);
1535        assert!(!config.should_use_color());
1536
1537        // Auto-detect (None with auto_detect=true)
1538        config.display.color.enabled = None;
1539        config.display.color.auto_detect = true;
1540        // This depends on atty, but we can't easily test it in unit tests
1541        // The logic is there, so we'll assume it's working
1542    }
1543
1544    #[test]
1545    fn test_variables_conversion() {
1546        let mut config = Config::default();
1547        config
1548            .variables
1549            .insert("string".to_string(), serde_json::json!("hello"));
1550        config
1551            .variables
1552            .insert("number".to_string(), serde_json::json!(42));
1553        config
1554            .variables
1555            .insert("array".to_string(), serde_json::json!([1, 2, 3]));
1556
1557        let converted = config.get_variables_as_value();
1558        assert_eq!(converted.len(), 3);
1559        assert_eq!(
1560            converted["string"],
1561            dsq_core::Value::String("hello".to_string())
1562        );
1563        assert_eq!(converted["number"], dsq_core::Value::Int(42));
1564        // Array conversion would depend on dsq_core::Value implementation
1565    }
1566
1567    #[test]
1568    fn test_save_toml() {
1569        let temp_dir = TempDir::new().unwrap();
1570        let config_path = temp_dir.path().join("saved.toml");
1571
1572        let mut config = Config::default();
1573        config.filter.lazy_evaluation = false;
1574        config.performance.batch_size = 1234;
1575
1576        config.save(&config_path).unwrap();
1577
1578        // Read back and verify
1579        let content = fs::read_to_string(&config_path).unwrap();
1580        assert!(content.contains("lazy_evaluation = false"));
1581        assert!(content.contains("batch_size = 1234"));
1582    }
1583
1584    #[test]
1585    fn test_save_yaml() {
1586        let temp_dir = TempDir::new().unwrap();
1587        let config_path = temp_dir.path().join("saved.yaml");
1588
1589        let mut config = Config::default();
1590        config.filter.lazy_evaluation = false;
1591        config.performance.batch_size = 1234;
1592
1593        config.save(&config_path).unwrap();
1594
1595        // Read back and verify
1596        let content = fs::read_to_string(&config_path).unwrap();
1597        assert!(content.contains("lazy_evaluation: false"));
1598        assert!(content.contains("batch_size: 1234"));
1599    }
1600
1601    #[test]
1602    fn test_save_errors() {
1603        let config = Config::default();
1604
1605        // Invalid extension
1606        let temp_dir = TempDir::new().unwrap();
1607        let config_path = temp_dir.path().join("config.json");
1608        assert!(config.save(&config_path).is_err());
1609
1610        // Non-existent directory
1611        let config_path = std::path::PathBuf::from("/nonexistent/dir/config.toml");
1612        assert!(config.save(&config_path).is_err());
1613    }
1614
1615    #[test]
1616    fn test_create_default_config_file() {
1617        let temp_dir = TempDir::new().unwrap();
1618        let config_path = temp_dir.path().join("default.toml");
1619
1620        create_default_config_file(&config_path).unwrap();
1621
1622        // Verify file was created and contains default config
1623        assert!(config_path.exists());
1624        let content = fs::read_to_string(&config_path).unwrap();
1625        assert!(content.contains("[filter]"));
1626        assert!(content.contains("lazy_evaluation = true"));
1627    }
1628
1629    #[test]
1630    fn test_config_load() {
1631        let temp_dir = TempDir::new().unwrap();
1632        let temp_path = temp_dir.path();
1633
1634        let toml_content = r#"
1635[filter]
1636lazy_evaluation = false
1637dataframe_optimizations = true
1638[performance]
1639batch_size = 7777
1640"#;
1641
1642        fs::create_dir_all(temp_path.join(".config").join("dsq")).unwrap();
1643        fs::write(
1644            temp_path.join(".config").join("dsq").join("dsq.toml"),
1645            toml_content,
1646        )
1647        .unwrap();
1648
1649        // Use mock environment reader to avoid environment variable leakage
1650        let temp_path_str = temp_path.to_str().unwrap().to_string();
1651        let env_reader = move |key: &str| match key {
1652            "HOME" => Some(temp_path_str.clone()),
1653            _ => None, // No other DSQ_* env vars set
1654        };
1655
1656        let config = Config::load_with_env_reader(env_reader).unwrap();
1657
1658        assert!(!config.filter.lazy_evaluation);
1659        assert_eq!(config.performance.batch_size, 7777);
1660        assert!(!config.debug.debug_mode); // default
1661    }
1662
1663    #[test]
1664    fn test_config_load_from_file() {
1665        let temp_dir = TempDir::new().unwrap();
1666        let config_path = temp_dir.path().join("test.toml");
1667
1668        let toml_content = r#"
1669[formats.csv]
1670separator = ";"
1671has_header = true
1672[debug]
1673verbosity = 3
1674"#;
1675        fs::write(&config_path, toml_content).unwrap();
1676
1677        let config = Config::load_from_file(&config_path).unwrap();
1678
1679        assert_eq!(config.formats.csv.separator, ";");
1680        assert_eq!(config.debug.verbosity, 3);
1681        // Other fields should be defaults
1682        assert!(config.filter.lazy_evaluation);
1683    }
1684
1685    #[test]
1686    fn test_to_read_options() {
1687        let config = Config::default();
1688        let opts = config.to_read_options();
1689
1690        // Currently returns defaults
1691        assert!(opts.infer_schema);
1692        assert!(opts.n_rows.is_none());
1693        assert_eq!(opts.skip_rows, 0);
1694    }
1695
1696    #[test]
1697    fn test_to_write_options() {
1698        let config = Config::default();
1699        let opts = config.to_write_options();
1700
1701        assert!(opts.include_header);
1702        assert!(opts.compression.is_none());
1703    }
1704
1705    #[test]
1706    fn test_to_executor_config() {
1707        let mut config = Config::default();
1708        config
1709            .variables
1710            .insert("test".to_string(), serde_json::json!("value"));
1711        config.filter.max_recursion_depth = 500;
1712
1713        let exec_config = config.to_executor_config();
1714
1715        assert_eq!(exec_config.timeout_ms, Some(300000)); // 300 seconds * 1000
1716        assert_eq!(exec_config.error_mode, dsq_filter::ErrorMode::Strict);
1717        assert!(!exec_config.collect_stats);
1718        assert_eq!(exec_config.max_recursion_depth, 500);
1719        assert!(!exec_config.debug_mode);
1720        assert_eq!(exec_config.batch_size, 10000);
1721        assert!(exec_config.variables.contains_key("test"));
1722    }
1723
1724    #[test]
1725    fn test_serialization_round_trip_toml() {
1726        let mut config = Config::default();
1727        config.filter.lazy_evaluation = false;
1728        config.performance.batch_size = 12345;
1729        config.formats.csv.separator = "|".to_string();
1730        config
1731            .variables
1732            .insert("key".to_string(), serde_json::json!({"nested": "value"}));
1733
1734        let temp_dir = TempDir::new().unwrap();
1735        let config_path = temp_dir.path().join("round_trip.toml");
1736
1737        // Save
1738        config.save(&config_path).unwrap();
1739
1740        // Load back
1741        let loaded_config = Config::load_from_file(&config_path).unwrap();
1742
1743        // Verify
1744        assert_eq!(
1745            config.filter.lazy_evaluation,
1746            loaded_config.filter.lazy_evaluation
1747        );
1748        assert_eq!(
1749            config.performance.batch_size,
1750            loaded_config.performance.batch_size
1751        );
1752        assert_eq!(
1753            config.formats.csv.separator,
1754            loaded_config.formats.csv.separator
1755        );
1756        assert_eq!(config.variables, loaded_config.variables);
1757    }
1758
1759    #[test]
1760    fn test_serialization_round_trip_yaml() {
1761        let mut config = Config::default();
1762        config.filter.lazy_evaluation = false;
1763        config.performance.batch_size = 12345;
1764        config.formats.csv.separator = "|".to_string();
1765        config
1766            .variables
1767            .insert("key".to_string(), serde_json::json!({"nested": "value"}));
1768
1769        let temp_dir = TempDir::new().unwrap();
1770        let config_path = temp_dir.path().join("round_trip.yaml");
1771
1772        // Save
1773        config.save(&config_path).unwrap();
1774
1775        // Load back
1776        let loaded_config = Config::load_from_file(&config_path).unwrap();
1777
1778        // Verify
1779        assert_eq!(
1780            config.filter.lazy_evaluation,
1781            loaded_config.filter.lazy_evaluation
1782        );
1783        assert_eq!(
1784            config.performance.batch_size,
1785            loaded_config.performance.batch_size
1786        );
1787        assert_eq!(
1788            config.formats.csv.separator,
1789            loaded_config.formats.csv.separator
1790        );
1791        assert_eq!(config.variables, loaded_config.variables);
1792    }
1793
1794    #[test]
1795    fn test_merge_comprehensive() {
1796        let mut base = Config::default();
1797        base.filter.lazy_evaluation = true;
1798        base.performance.batch_size = 1000;
1799
1800        let other = Config {
1801            filter: FilterConfig {
1802                lazy_evaluation: false,
1803                dataframe_optimizations: false,
1804                optimization_level: "advanced".to_string(),
1805                max_recursion_depth: 2000,
1806                max_execution_time: Some(600),
1807                collect_stats: true,
1808                error_mode: "collect".to_string(),
1809            },
1810            performance: PerformanceConfig {
1811                batch_size: 2000,
1812                memory_limit: Some(1024 * 1024 * 1024),
1813                threads: 8,
1814                parallel: false,
1815                cache_size: 200,
1816            },
1817            formats: FormatConfigs {
1818                csv: CsvConfig {
1819                    separator: ";".to_string(),
1820                    has_header: false,
1821                    ..Default::default()
1822                },
1823                ..Default::default()
1824            },
1825            display: DisplayConfig {
1826                color: ColorConfig {
1827                    enabled: Some(true),
1828                    scheme: "dark".to_string(),
1829                    auto_detect: false,
1830                },
1831                compact: true,
1832                ..Default::default()
1833            },
1834            modules: ModuleConfig {
1835                library_paths: vec![std::path::PathBuf::from("/test")],
1836                auto_load: vec!["test".to_string()],
1837                cache_dir: Some(std::path::PathBuf::from("/cache")),
1838            },
1839            debug: DebugConfig {
1840                verbosity: 3,
1841                show_plans: true,
1842                show_timing: true,
1843                debug_mode: true,
1844                log_file: Some(std::path::PathBuf::from("/log")),
1845            },
1846            variables: {
1847                let mut vars = std::collections::HashMap::new();
1848                vars.insert("var1".to_string(), serde_json::json!("value1"));
1849                vars
1850            },
1851            ..Default::default()
1852        };
1853
1854        base.merge(other);
1855
1856        // Check merged values
1857        assert!(!base.filter.lazy_evaluation); // changed
1858        assert!(!base.filter.dataframe_optimizations); // changed
1859        assert_eq!(base.filter.optimization_level, "advanced");
1860        assert_eq!(base.filter.max_recursion_depth, 2000);
1861        assert_eq!(base.filter.max_execution_time, Some(600));
1862        assert!(base.filter.collect_stats);
1863        assert_eq!(base.filter.error_mode, "collect");
1864
1865        assert_eq!(base.performance.batch_size, 2000);
1866        assert_eq!(base.performance.memory_limit, Some(1024 * 1024 * 1024));
1867        assert_eq!(base.performance.threads, 8);
1868        assert!(!base.performance.parallel);
1869        assert_eq!(base.performance.cache_size, 200);
1870
1871        assert_eq!(base.formats.csv.separator, ";");
1872        assert!(!base.formats.csv.has_header);
1873
1874        assert_eq!(base.display.color.enabled, Some(true));
1875        assert_eq!(base.display.color.scheme, "dark");
1876        assert!(!base.display.color.auto_detect);
1877        assert!(base.display.compact);
1878
1879        assert_eq!(
1880            base.modules.library_paths,
1881            vec![std::path::PathBuf::from("/test")]
1882        );
1883        assert_eq!(base.modules.auto_load, vec!["test".to_string()]);
1884        assert_eq!(
1885            base.modules.cache_dir,
1886            Some(std::path::PathBuf::from("/cache"))
1887        );
1888
1889        assert_eq!(base.debug.verbosity, 3);
1890        assert!(base.debug.show_plans);
1891        assert!(base.debug.show_timing);
1892        assert!(base.debug.debug_mode);
1893        assert_eq!(base.debug.log_file, Some(std::path::PathBuf::from("/log")));
1894
1895        assert_eq!(base.variables["var1"], serde_json::json!("value1"));
1896    }
1897
1898    #[test]
1899    fn test_merge_partial_override() {
1900        let mut base = Config::default();
1901        base.filter.lazy_evaluation = true;
1902        base.performance.batch_size = 1000;
1903
1904        let other = Config {
1905            filter: FilterConfig {
1906                lazy_evaluation: false, // should override
1907                ..Default::default()
1908            },
1909            performance: PerformanceConfig {
1910                batch_size: PerformanceConfig::default().batch_size, // should not override since == default
1911                ..Default::default()
1912            },
1913            ..Default::default()
1914        };
1915
1916        base.merge(other);
1917
1918        assert!(!base.filter.lazy_evaluation); // overridden
1919        assert_eq!(base.performance.batch_size, 1000); // not overridden
1920    }
1921
1922    #[test]
1923    fn test_parse_memory_limit_edge_cases() {
1924        // Test large numbers (use reasonable size)
1925        assert_eq!(
1926            parse_memory_limit("1000GB").unwrap(),
1927            1000usize * 1024 * 1024 * 1024
1928        );
1929
1930        // Test zero
1931        assert_eq!(parse_memory_limit("0").unwrap(), 0);
1932
1933        // Test fractional (should fail)
1934        assert!(parse_memory_limit("1.5GB").is_err());
1935
1936        // Test negative (should fail as usize)
1937        assert!(parse_memory_limit("-1GB").is_err());
1938
1939        // Test mixed case
1940        assert_eq!(parse_memory_limit("1Gb").unwrap(), 1024 * 1024 * 1024);
1941        assert_eq!(parse_memory_limit("1gB").unwrap(), 1024 * 1024 * 1024);
1942
1943        // Test whitespace
1944        assert!(parse_memory_limit(" 1GB ").is_err()); // no trim
1945
1946        // Test empty string
1947        assert!(parse_memory_limit("").is_err());
1948    }
1949
1950    #[test]
1951    fn test_validate_config_edge_cases() {
1952        let mut config = Config::default();
1953
1954        // Valid config
1955        assert!(validate_config(&config).is_ok());
1956
1957        // Test high thread count
1958        config.performance.threads = 1024;
1959        assert!(validate_config(&config).is_ok());
1960        config.performance.threads = 1025;
1961        assert!(validate_config(&config).is_err());
1962
1963        // Test empty CSV separator
1964        config = Config::default();
1965        config.formats.csv.separator = "".to_string();
1966        assert!(validate_config(&config).is_err());
1967
1968        // Test multi-char CSV separator
1969        config.formats.csv.separator = ",;".to_string();
1970        assert!(validate_config(&config).is_err());
1971
1972        // Test empty quote char
1973        config = Config::default();
1974        config.formats.csv.quote_char = "".to_string();
1975        assert!(validate_config(&config).is_err());
1976
1977        // Test multi-char quote
1978        config.formats.csv.quote_char = "\"\"".to_string();
1979        assert!(validate_config(&config).is_err());
1980
1981        // Test zero batch size
1982        config = Config::default();
1983        config.performance.batch_size = 0;
1984        assert!(validate_config(&config).is_err());
1985
1986        // Test zero recursion depth
1987        config = Config::default();
1988        config.filter.max_recursion_depth = 0;
1989        assert!(validate_config(&config).is_err());
1990    }
1991
1992    #[test]
1993    fn test_find_config_file_with_home_edge_cases() {
1994        let temp_dir = TempDir::new().unwrap();
1995        let temp_path = temp_dir.path();
1996
1997        // Test with non-existent home
1998        assert!(
1999            Config::find_config_file_with_home(Some(temp_path), Some("/nonexistent")).is_none()
2000        );
2001
2002        // Test with empty home
2003        assert!(Config::find_config_file_with_home(Some(temp_path), Some("")).is_none());
2004
2005        // Test with home that exists but no config
2006        assert!(Config::find_config_file_with_home(Some(temp_path), temp_path.to_str()).is_none());
2007
2008        // Test priority: current dir over home
2009        fs::write(temp_path.join("dsq.toml"), "current").unwrap();
2010        let home_dir = TempDir::new().unwrap();
2011        fs::create_dir_all(home_dir.path().join(".config").join("dsq")).unwrap();
2012        fs::write(
2013            home_dir.path().join(".config").join("dsq").join("dsq.toml"),
2014            "home",
2015        )
2016        .unwrap();
2017
2018        let found = Config::find_config_file_with_home(Some(temp_path), home_dir.path().to_str());
2019        assert_eq!(found.unwrap(), temp_path.join("dsq.toml"));
2020
2021        // Remove current, should find home
2022        fs::remove_file(temp_path.join("dsq.toml")).unwrap();
2023        let found = Config::find_config_file_with_home(Some(temp_path), home_dir.path().to_str());
2024        assert_eq!(
2025            found.unwrap(),
2026            home_dir.path().join(".config").join("dsq").join("dsq.toml")
2027        );
2028    }
2029
2030    #[test]
2031    fn test_get_format_read_options() {
2032        let config = Config::default();
2033
2034        // Test CSV
2035        let csv_opts = config.get_format_read_options(DataFormat::Csv);
2036        assert!(csv_opts.infer_schema);
2037
2038        // Test JSON
2039        let json_opts = config.get_format_read_options(DataFormat::Json);
2040        assert!(json_opts.infer_schema);
2041
2042        // Test Parquet
2043        let parquet_opts = config.get_format_read_options(DataFormat::Parquet);
2044        assert!(!parquet_opts.infer_schema);
2045
2046        // Test other formats (should use defaults)
2047        let other_opts = config.get_format_read_options(DataFormat::JsonLines);
2048        assert!(other_opts.infer_schema);
2049    }
2050
2051    #[test]
2052    fn test_get_format_write_options() {
2053        let mut config = Config::default();
2054
2055        // Test CSV
2056        let csv_opts = config.get_format_write_options(DataFormat::Csv);
2057        assert!(csv_opts.include_header);
2058
2059        config.formats.csv.has_header = false;
2060        let csv_opts = config.get_format_write_options(DataFormat::Csv);
2061        assert!(!csv_opts.include_header);
2062
2063        // Test JSON
2064        let json_opts = config.get_format_write_options(DataFormat::Json);
2065        assert!(!json_opts.include_header);
2066
2067        // Test Parquet
2068        let parquet_opts = config.get_format_write_options(DataFormat::Parquet);
2069        assert!(!parquet_opts.include_header);
2070        assert_eq!(parquet_opts.compression, Some("snappy".to_string()));
2071
2072        config.formats.parquet.compression = "gzip".to_string();
2073        let parquet_opts = config.get_format_write_options(DataFormat::Parquet);
2074        assert_eq!(parquet_opts.compression, Some("gzip".to_string()));
2075    }
2076
2077    #[test]
2078    fn test_variables_conversion_complex() {
2079        let mut config = Config::default();
2080
2081        // Add various JSON types
2082        config
2083            .variables
2084            .insert("string".to_string(), serde_json::json!("hello"));
2085        config
2086            .variables
2087            .insert("number".to_string(), serde_json::json!(42.5));
2088        config
2089            .variables
2090            .insert("integer".to_string(), serde_json::json!(42));
2091        config
2092            .variables
2093            .insert("boolean".to_string(), serde_json::json!(true));
2094        config
2095            .variables
2096            .insert("null".to_string(), serde_json::json!(null));
2097        config
2098            .variables
2099            .insert("array".to_string(), serde_json::json!([1, 2, 3]));
2100        config
2101            .variables
2102            .insert("object".to_string(), serde_json::json!({"key": "value"}));
2103
2104        let converted = config.get_variables_as_value();
2105
2106        assert_eq!(converted.len(), 7);
2107        assert_eq!(
2108            converted["string"],
2109            dsq_core::Value::String("hello".to_string())
2110        );
2111        assert_eq!(converted["number"], dsq_core::Value::Float(42.5));
2112        assert_eq!(converted["integer"], dsq_core::Value::Int(42));
2113        assert_eq!(converted["boolean"], dsq_core::Value::Bool(true));
2114        // Null, array, object conversion depends on dsq_core::Value implementation
2115    }
2116
2117    #[test]
2118    fn test_config_load_with_env_and_file() {
2119        let temp_dir = TempDir::new().unwrap();
2120        let temp_path = temp_dir.path();
2121
2122        // Create config file
2123        let toml_content = r#"
2124[filter]
2125lazy_evaluation = false
2126[performance]
2127batch_size = 5000
2128"#;
2129        fs::create_dir_all(temp_path.join(".config").join("dsq")).unwrap();
2130        fs::write(
2131            temp_path.join(".config").join("dsq").join("dsq.toml"),
2132            toml_content,
2133        )
2134        .unwrap();
2135
2136        // Mock env reader with HOME and some DSQ vars
2137        let temp_path_str = temp_path.to_str().unwrap().to_string();
2138        let env_reader = move |key: &str| match key {
2139            "HOME" => Some(temp_path_str.clone()),
2140            "DSQ_DEBUG" => Some("true".to_string()),
2141            "DSQ_COLORS" => Some("false".to_string()),
2142            _ => None,
2143        };
2144
2145        let config = Config::load_with_env_reader(env_reader).unwrap();
2146
2147        // File settings
2148        assert!(!config.filter.lazy_evaluation);
2149        assert_eq!(config.performance.batch_size, 5000);
2150
2151        // Env settings
2152        assert!(config.debug.debug_mode);
2153        assert_eq!(config.display.color.enabled, Some(false));
2154
2155        // Defaults
2156        assert!(config.filter.dataframe_optimizations);
2157    }
2158
2159    #[test]
2160    fn test_apply_cli_edge_cases() {
2161        #[cfg(feature = "cli")]
2162        {
2163            let mut config = Config::default();
2164            let mut cli_config = CliConfig {
2165                input_format: Some(DataFormat::Csv),
2166                output_format: Some(DataFormat::Json),
2167                limit: Some(1000),
2168                lazy: false,
2169                dataframe_optimizations: false,
2170                compact_output: true,
2171                raw_output: true,
2172                sort_keys: true,
2173                exit_status: true,
2174                color_output: None,
2175                csv_separator: Some("|".to_string()),
2176                csv_headers: Some(false),
2177                batch_size: Some(9999),
2178                memory_limit: Some("512MB".to_string()),
2179                library_path: vec![std::path::PathBuf::from("/cli/lib")],
2180                verbose: 5,
2181                explain: true,
2182                ..Default::default()
2183            };
2184            cli_config
2185                .variables
2186                .insert("cli_var".to_string(), serde_json::json!("cli_value"));
2187
2188            config.apply_cli(&cli_config).unwrap();
2189
2190            assert_eq!(config.io.default_input_format, Some(DataFormat::Csv));
2191            assert_eq!(config.io.default_output_format, Some(DataFormat::Json));
2192            assert_eq!(config.io.limit, Some(1000));
2193            assert!(!config.filter.lazy_evaluation);
2194            assert!(!config.filter.dataframe_optimizations);
2195            assert!(config.display.compact);
2196            assert!(config.display.raw_output);
2197            assert!(config.display.sort_keys);
2198            assert!(config.display.exit_status);
2199            assert_eq!(config.formats.csv.separator, "|");
2200            assert!(!config.formats.csv.has_header);
2201            assert_eq!(config.performance.batch_size, 9999);
2202            assert_eq!(config.performance.memory_limit, Some(512 * 1024 * 1024));
2203            assert_eq!(
2204                config.modules.library_paths,
2205                vec![std::path::PathBuf::from("/cli/lib")]
2206            );
2207            assert_eq!(config.debug.verbosity, 5);
2208            assert!(config.debug.show_plans);
2209            assert_eq!(config.variables["cli_var"], serde_json::json!("cli_value"));
2210        }
2211    }
2212
2213    #[test]
2214    fn test_merge_csv_config() {
2215        let mut config = Config::default();
2216        let other = CsvConfig {
2217            separator: ";".to_string(),
2218            has_header: false,
2219            quote_char: "'".to_string(),
2220            comment_char: Some("#".to_string()),
2221            null_values: vec!["N/A".to_string()],
2222            trim_whitespace: true,
2223            infer_schema_length: 2000,
2224        };
2225
2226        config.merge_csv_config(other);
2227
2228        assert_eq!(config.formats.csv.separator, ";");
2229        assert!(!config.formats.csv.has_header);
2230        assert_eq!(config.formats.csv.quote_char, "'");
2231        assert_eq!(config.formats.csv.comment_char, Some("#".to_string()));
2232        assert_eq!(config.formats.csv.null_values, vec!["N/A".to_string()]);
2233        assert!(config.formats.csv.trim_whitespace);
2234        assert_eq!(config.formats.csv.infer_schema_length, 2000);
2235    }
2236
2237    #[test]
2238    fn test_merge_json_config() {
2239        let mut config = Config::default();
2240        let other = JsonConfig {
2241            pretty_print: false,
2242            maintain_order: true,
2243            escape_unicode: true,
2244            flatten: true,
2245            flatten_separator: "/".to_string(),
2246        };
2247
2248        config.merge_json_config(other);
2249
2250        assert!(!config.formats.json.pretty_print);
2251        assert!(config.formats.json.maintain_order);
2252        assert!(config.formats.json.escape_unicode);
2253        assert!(config.formats.json.flatten);
2254        assert_eq!(config.formats.json.flatten_separator, "/");
2255    }
2256
2257    #[test]
2258    fn test_merge_parquet_config() {
2259        let mut config = Config::default();
2260        let other = ParquetConfig {
2261            compression: "gzip".to_string(),
2262            write_statistics: false,
2263            row_group_size: 2000000,
2264            data_page_size: 1000000,
2265        };
2266
2267        config.merge_parquet_config(other);
2268
2269        assert_eq!(config.formats.parquet.compression, "gzip");
2270        assert!(!config.formats.parquet.write_statistics);
2271        assert_eq!(config.formats.parquet.row_group_size, 2000000);
2272        assert_eq!(config.formats.parquet.data_page_size, 1000000);
2273    }
2274
2275    #[test]
2276    fn test_merge_display_config() {
2277        let mut config = Config::default();
2278        let other = DisplayConfig {
2279            color: ColorConfig {
2280                enabled: Some(false),
2281                scheme: "light".to_string(),
2282                auto_detect: false,
2283            },
2284            compact: true,
2285            sort_keys: true,
2286            raw_output: true,
2287            exit_status: true,
2288            number_format: NumberFormatConfig {
2289                float_precision: Some(2),
2290                scientific_notation: true,
2291                scientific_threshold: 1000.0,
2292            },
2293            datetime_format: DateTimeFormatConfig {
2294                date_format: "%d/%m/%Y".to_string(),
2295                datetime_format: "%d/%m/%Y %H:%M:%S".to_string(),
2296                time_format: "%H:%M".to_string(),
2297                timezone: "UTC".to_string(),
2298            },
2299        };
2300
2301        config.merge_display_config(other);
2302
2303        assert_eq!(config.display.color.enabled, Some(false));
2304        assert_eq!(config.display.color.scheme, "light");
2305        assert!(!config.display.color.auto_detect);
2306        assert!(config.display.compact);
2307        assert!(config.display.sort_keys);
2308        assert!(config.display.raw_output);
2309        assert!(config.display.exit_status);
2310        assert_eq!(config.display.number_format.float_precision, Some(2));
2311        assert!(config.display.number_format.scientific_notation);
2312        assert_eq!(config.display.number_format.scientific_threshold, 1000.0);
2313        assert_eq!(config.display.datetime_format.date_format, "%d/%m/%Y");
2314        assert_eq!(
2315            config.display.datetime_format.datetime_format,
2316            "%d/%m/%Y %H:%M:%S"
2317        );
2318        assert_eq!(config.display.datetime_format.time_format, "%H:%M");
2319        assert_eq!(config.display.datetime_format.timezone, "UTC");
2320    }
2321}