Skip to main content

robin_sparkless_core/
config.rs

1//! Configuration for robin-sparkless sessions.
2//!
3//! Use [`SparklessConfig`] to configure a session from code or environment variables,
4//! then create a session with `SparkSession::from_config` (from the main robin-sparkless crate).
5
6use std::collections::HashMap;
7use std::path::PathBuf;
8
9/// Configuration for building a SparkSession.
10///
11/// Can be constructed manually or from environment variables via [`SparklessConfig::from_env`].
12#[derive(Clone, Debug, Default)]
13pub struct SparklessConfig {
14    /// Directory for disk-backed tables (saveAsTable). Maps to `spark.sql.warehouse.dir`.
15    pub warehouse_dir: Option<PathBuf>,
16    /// Optional temp directory for intermediate data. Reserved for future use.
17    pub temp_dir: Option<PathBuf>,
18    /// When true, column names are case-sensitive. Default is false (PySpark default).
19    /// Maps to `spark.sql.caseSensitive`.
20    pub case_sensitive: bool,
21    /// Extra Spark-style config key-value pairs (e.g. `spark.app.name`, `spark.executor.memory`).
22    pub extra: HashMap<String, String>,
23}
24
25impl SparklessConfig {
26    /// Build config from environment variables.
27    ///
28    /// - `ROBIN_SPARKLESS_WAREHOUSE_DIR` → `warehouse_dir`
29    /// - `ROBIN_SPARKLESS_TEMP_DIR` → `temp_dir`
30    /// - `ROBIN_SPARKLESS_CASE_SENSITIVE` → `case_sensitive` (any value that is "true" or "1" case-insensitively)
31    /// - `ROBIN_SPARKLESS_CONFIG_*` → keys in `extra`; the key is the suffix after the prefix, with underscores converted to dots (e.g. `ROBIN_SPARKLESS_CONFIG_SPARK_APP_NAME` → `spark.app.name`)
32    pub fn from_env() -> Self {
33        let warehouse_dir = std::env::var("ROBIN_SPARKLESS_WAREHOUSE_DIR")
34            .ok()
35            .filter(|s| !s.is_empty())
36            .map(PathBuf::from);
37        let temp_dir = std::env::var("ROBIN_SPARKLESS_TEMP_DIR")
38            .ok()
39            .filter(|s| !s.is_empty())
40            .map(PathBuf::from);
41        let case_sensitive = std::env::var("ROBIN_SPARKLESS_CASE_SENSITIVE")
42            .map(|v| v.eq_ignore_ascii_case("true") || v == "1")
43            .unwrap_or(false);
44        let prefix = "ROBIN_SPARKLESS_CONFIG_";
45        let mut extra = HashMap::new();
46        for (k, v) in std::env::vars() {
47            if let Some(suffix) = k.strip_prefix(prefix) {
48                let key = suffix.replace('_', ".");
49                if !key.is_empty() && !v.is_empty() {
50                    extra.insert(key, v);
51                }
52            }
53        }
54        SparklessConfig {
55            warehouse_dir,
56            temp_dir,
57            case_sensitive,
58            extra,
59        }
60    }
61
62    /// Convert to the session config map (spark.sql.warehouse.dir, spark.sql.caseSensitive, plus extra).
63    pub fn to_session_config(&self) -> HashMap<String, String> {
64        let mut m = self.extra.clone();
65        if let Some(ref d) = self.warehouse_dir
66            && let Some(s) = d.to_str()
67        {
68            m.insert("spark.sql.warehouse.dir".to_string(), s.to_string());
69        }
70        m.insert(
71            "spark.sql.caseSensitive".to_string(),
72            if self.case_sensitive { "true" } else { "false" }.to_string(),
73        );
74        m
75    }
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn config_default() {
84        let c = SparklessConfig::default();
85        assert!(c.warehouse_dir.is_none());
86        assert!(c.temp_dir.is_none());
87        assert!(!c.case_sensitive);
88        assert!(c.extra.is_empty());
89    }
90
91    #[test]
92    fn to_session_config_empty() {
93        let c = SparklessConfig::default();
94        let m = c.to_session_config();
95        assert_eq!(m.get("spark.sql.caseSensitive").unwrap(), "false");
96        assert!(!m.contains_key("spark.sql.warehouse.dir"));
97    }
98
99    #[test]
100    fn to_session_config_warehouse_and_case_sensitive() {
101        let c = SparklessConfig {
102            warehouse_dir: Some(PathBuf::from("/tmp/wh")),
103            case_sensitive: true,
104            ..Default::default()
105        };
106        let m = c.to_session_config();
107        assert_eq!(m.get("spark.sql.warehouse.dir").unwrap(), "/tmp/wh");
108        assert_eq!(m.get("spark.sql.caseSensitive").unwrap(), "true");
109    }
110
111    #[test]
112    fn to_session_config_extra_preserved() {
113        let mut extra = std::collections::HashMap::new();
114        extra.insert("spark.app.name".to_string(), "MyApp".to_string());
115        let c = SparklessConfig {
116            extra,
117            ..Default::default()
118        };
119        let m = c.to_session_config();
120        assert_eq!(m.get("spark.app.name").unwrap(), "MyApp");
121        assert_eq!(m.get("spark.sql.caseSensitive").unwrap(), "false");
122    }
123}