Skip to main content

robin_sparkless_core/
config.rs

1//! Configuration for robin-sparkless sessions.
2//!
3//! Use [`SparklessConfig`] to configure a session from code or environment variables,
4//! then create a session with `SparkSession::from_config` (from the main robin-sparkless crate).
5
6use std::collections::HashMap;
7use std::path::PathBuf;
8
9/// Configuration for building a SparkSession.
10///
11/// Can be constructed manually or from environment variables via [`SparklessConfig::from_env`].
12#[derive(Clone, Debug, Default)]
13pub struct SparklessConfig {
14    /// Directory for disk-backed tables (saveAsTable). Maps to `spark.sql.warehouse.dir`.
15    pub warehouse_dir: Option<PathBuf>,
16    /// Optional temp directory for intermediate data. Reserved for future use.
17    pub temp_dir: Option<PathBuf>,
18    /// When true, column names are case-sensitive. Default is false (PySpark default).
19    /// Maps to `spark.sql.caseSensitive`.
20    pub case_sensitive: bool,
21    /// Extra Spark-style config key-value pairs (e.g. `spark.app.name`, `spark.executor.memory`).
22    pub extra: HashMap<String, String>,
23}
24
25impl SparklessConfig {
26    /// Build config from environment variables.
27    ///
28    /// - `ROBIN_SPARKLESS_WAREHOUSE_DIR` → `warehouse_dir`
29    /// - `ROBIN_SPARKLESS_TEMP_DIR` → `temp_dir`
30    /// - `ROBIN_SPARKLESS_CASE_SENSITIVE` → `case_sensitive` (any value that is "true" or "1" case-insensitively)
31    /// - `ROBIN_SPARKLESS_CONFIG_*` → keys in `extra`; the key is the suffix after the prefix, with underscores converted to dots (e.g. `ROBIN_SPARKLESS_CONFIG_SPARK_APP_NAME` → `spark.app.name`)
32    pub fn from_env() -> Self {
33        let warehouse_dir = std::env::var("ROBIN_SPARKLESS_WAREHOUSE_DIR")
34            .ok()
35            .filter(|s| !s.is_empty())
36            .map(PathBuf::from);
37        let temp_dir = std::env::var("ROBIN_SPARKLESS_TEMP_DIR")
38            .ok()
39            .filter(|s| !s.is_empty())
40            .map(PathBuf::from);
41        let case_sensitive = std::env::var("ROBIN_SPARKLESS_CASE_SENSITIVE")
42            .map(|v| v.eq_ignore_ascii_case("true") || v == "1")
43            .unwrap_or(false);
44        let prefix = "ROBIN_SPARKLESS_CONFIG_";
45        let mut extra = HashMap::new();
46        for (k, v) in std::env::vars() {
47            if let Some(suffix) = k.strip_prefix(prefix) {
48                let key = suffix.replace('_', ".");
49                if !key.is_empty() && !v.is_empty() {
50                    extra.insert(key, v);
51                }
52            }
53        }
54        SparklessConfig {
55            warehouse_dir,
56            temp_dir,
57            case_sensitive,
58            extra,
59        }
60    }
61
62    /// Convert to the session config map (spark.sql.warehouse.dir, spark.sql.caseSensitive, plus extra).
63    pub fn to_session_config(&self) -> HashMap<String, String> {
64        let mut m = self.extra.clone();
65        if let Some(ref d) = self.warehouse_dir
66            && let Some(s) = d.to_str()
67        {
68            m.insert("spark.sql.warehouse.dir".to_string(), s.to_string());
69        }
70        m.insert(
71            "spark.sql.caseSensitive".to_string(),
72            if self.case_sensitive { "true" } else { "false" }.to_string(),
73        );
74        m
75    }
76}