Skip to main content

polars_io/cloud/
options.rs

1#[cfg(feature = "aws")]
2use std::io::Read;
3#[cfg(feature = "aws")]
4use std::path::Path;
5use std::str::FromStr;
6use std::sync::LazyLock;
7
8#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
9use object_store::ClientOptions;
10#[cfg(feature = "aws")]
11use object_store::aws::AmazonS3Builder;
12#[cfg(feature = "aws")]
13pub use object_store::aws::AmazonS3ConfigKey;
14#[cfg(feature = "azure")]
15pub use object_store::azure::AzureConfigKey;
16#[cfg(feature = "azure")]
17use object_store::azure::MicrosoftAzureBuilder;
18#[cfg(feature = "gcp")]
19use object_store::gcp::GoogleCloudStorageBuilder;
20#[cfg(feature = "gcp")]
21pub use object_store::gcp::GoogleConfigKey;
22use polars_error::*;
23#[cfg(feature = "aws")]
24use polars_utils::cache::LruCache;
25use polars_utils::pl_path::{CloudScheme, PlRefPath};
26use polars_utils::total_ord::TotalOrdWrap;
27#[cfg(feature = "http")]
28use reqwest::header::HeaderMap;
29#[cfg(feature = "serde")]
30use serde::{Deserialize, Serialize};
31
32#[cfg(feature = "cloud")]
33use super::credential_provider::PlCredentialProvider;
34#[cfg(feature = "file_cache")]
35use crate::file_cache::get_env_file_cache_ttl;
36#[cfg(feature = "aws")]
37use crate::pl_async::with_concurrency_budget;
38
39#[cfg(feature = "aws")]
40static BUCKET_REGION: LazyLock<
41    std::sync::Mutex<LruCache<polars_utils::pl_str::PlSmallStr, polars_utils::pl_str::PlSmallStr>>,
42> = LazyLock::new(|| std::sync::Mutex::new(LruCache::with_capacity(32)));
43
44/// The type of the config keys must satisfy the following requirements:
45/// 1. must be easily collected into a HashMap, the type required by the object_crate API.
46/// 2. be Serializable, required when the serde-lazy feature is defined.
47/// 3. not actually use HashMap since that type is disallowed in Polars for performance reasons.
48///
49/// Currently this type is a vector of pairs config key - config value.
50#[allow(dead_code)]
51type Configs<T> = Vec<(T, String)>;
52
53#[derive(Clone, Debug, PartialEq, Hash, Eq)]
54#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
55#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
56pub(crate) enum CloudConfig {
57    #[cfg(feature = "aws")]
58    Aws(
59        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
60        Configs<AmazonS3ConfigKey>,
61    ),
62    #[cfg(feature = "azure")]
63    Azure(
64        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
65        Configs<AzureConfigKey>,
66    ),
67    #[cfg(feature = "gcp")]
68    Gcp(
69        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
70        Configs<GoogleConfigKey>,
71    ),
72    #[cfg(feature = "http")]
73    Http { headers: Vec<(String, String)> },
74}
75
76#[derive(Clone, Debug, PartialEq, Hash, Eq)]
77#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
78#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
79/// Options to connect to various cloud providers.
80pub struct CloudOptions {
81    #[cfg(feature = "file_cache")]
82    pub file_cache_ttl: u64,
83    pub(crate) config: Option<CloudConfig>,
84    #[cfg_attr(feature = "serde", serde(default))]
85    pub retry_config: CloudRetryConfig,
86    #[cfg(feature = "cloud")]
87    /// Note: In most cases you will want to access this via [`CloudOptions::initialized_credential_provider`]
88    /// rather than directly.
89    pub(crate) credential_provider: Option<PlCredentialProvider>,
90}
91
92impl Default for CloudOptions {
93    fn default() -> Self {
94        Self::default_static_ref().clone()
95    }
96}
97
98impl CloudOptions {
99    pub fn default_static_ref() -> &'static Self {
100        static DEFAULT: LazyLock<CloudOptions> = LazyLock::new(|| CloudOptions {
101            #[cfg(feature = "file_cache")]
102            file_cache_ttl: get_env_file_cache_ttl(),
103            config: None,
104            retry_config: CloudRetryConfig::default(),
105            #[cfg(feature = "cloud")]
106            credential_provider: None,
107        });
108
109        &DEFAULT
110    }
111}
112
113#[derive(Clone, Copy, Default, Debug, PartialEq, Hash, Eq)]
114#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
115#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
116pub struct CloudRetryConfig {
117    pub max_retries: Option<usize>,
118    pub retry_timeout: Option<std::time::Duration>,
119    pub retry_init_backoff: Option<std::time::Duration>,
120    pub retry_max_backoff: Option<std::time::Duration>,
121    pub retry_base_multiplier: Option<TotalOrdWrap<f64>>,
122}
123
124#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
125impl From<CloudRetryConfig> for object_store::RetryConfig {
126    fn from(value: CloudRetryConfig) -> Self {
127        use std::time::Duration;
128
129        use polars_core::config::verbose;
130
131        let out = object_store::RetryConfig {
132            backoff: object_store::BackoffConfig {
133                init_backoff: value
134                    .retry_init_backoff
135                    .unwrap_or_else(|| DEFAULTS.backoff.init_backoff),
136                max_backoff: value
137                    .retry_max_backoff
138                    .unwrap_or_else(|| DEFAULTS.backoff.max_backoff),
139                base: value
140                    .retry_base_multiplier
141                    .map_or_else(|| DEFAULTS.backoff.base, |x| x.0),
142            },
143            max_retries: value.max_retries.unwrap_or_else(|| DEFAULTS.max_retries),
144            retry_timeout: value
145                .retry_timeout
146                .unwrap_or_else(|| DEFAULTS.retry_timeout),
147        };
148
149        if verbose() {
150            eprintln!("object-store retry config: {:?}", &out)
151        }
152
153        return out;
154
155        static DEFAULTS: LazyLock<object_store::RetryConfig> =
156            LazyLock::new(|| object_store::RetryConfig {
157                backoff: object_store::BackoffConfig {
158                    init_backoff: Duration::from_millis(parse_env_var(
159                        100,
160                        "POLARS_CLOUD_RETRY_INIT_BACKOFF_MS",
161                    )),
162                    max_backoff: Duration::from_millis(parse_env_var(
163                        15 * 1000,
164                        "POLARS_CLOUD_RETRY_MAX_BACKOFF_MS",
165                    )),
166                    base: parse_env_var(2., "POLARS_CLOUD_RETRY_BASE_MULTIPLIER"),
167                },
168                max_retries: parse_env_var(2, "POLARS_CLOUD_MAX_RETRIES"),
169                retry_timeout: Duration::from_millis(parse_env_var(
170                    10 * 1000,
171                    "POLARS_CLOUD_RETRY_TIMEOUT_MS",
172                )),
173            });
174
175        fn parse_env_var<T: FromStr>(default: T, name: &'static str) -> T {
176            std::env::var(name).map_or(default, |x| {
177                x.parse::<T>()
178                    .ok()
179                    .unwrap_or_else(|| panic!("invalid value for {name}: {x}"))
180            })
181        }
182    }
183}
184
185#[cfg(feature = "http")]
186pub(crate) fn try_build_http_header_map_from_items_slice<S: AsRef<str>>(
187    headers: &[(S, S)],
188) -> PolarsResult<HeaderMap> {
189    use reqwest::header::{HeaderName, HeaderValue};
190
191    let mut map = HeaderMap::with_capacity(headers.len());
192    for (k, v) in headers {
193        let (k, v) = (k.as_ref(), v.as_ref());
194        map.insert(
195            HeaderName::from_str(k).map_err(to_compute_err)?,
196            HeaderValue::from_str(v).map_err(to_compute_err)?,
197        );
198    }
199
200    Ok(map)
201}
202
203#[allow(dead_code)]
204/// Parse an untype configuration hashmap to a typed configuration for the given configuration key type.
205fn parse_untyped_config<T, I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
206    config: I,
207) -> PolarsResult<Configs<T>>
208where
209    T: FromStr + Eq + std::hash::Hash,
210{
211    Ok(config
212        .into_iter()
213        // Silently ignores custom upstream storage_options
214        .filter_map(|(key, val)| {
215            T::from_str(key.as_ref().to_ascii_lowercase().as_str())
216                .ok()
217                .map(|typed_key| (typed_key, val.into()))
218        })
219        .collect::<Configs<T>>())
220}
221
222#[derive(Debug, Clone, PartialEq)]
223pub enum CloudType {
224    Aws,
225    Azure,
226    /// URI with 'file:' scheme
227    File,
228    /// Google cloud platform
229    Gcp,
230    Http,
231    /// HuggingFace
232    Hf,
233}
234
235impl CloudType {
236    pub fn from_cloud_scheme(scheme: CloudScheme) -> Self {
237        match scheme {
238            CloudScheme::Abfs
239            | CloudScheme::Abfss
240            | CloudScheme::Adl
241            | CloudScheme::Az
242            | CloudScheme::Azure => Self::Azure,
243
244            CloudScheme::File | CloudScheme::FileNoHostname => Self::File,
245
246            CloudScheme::Gcs | CloudScheme::Gs => Self::Gcp,
247
248            CloudScheme::Hf => Self::Hf,
249
250            CloudScheme::Http | CloudScheme::Https => Self::Http,
251
252            CloudScheme::S3 | CloudScheme::S3a => Self::Aws,
253        }
254    }
255}
256
257pub static USER_AGENT: &str = concat!("polars", "/", env!("CARGO_PKG_VERSION"),);
258
259#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
260pub(super) fn get_client_options() -> ClientOptions {
261    use std::num::NonZeroU64;
262
263    use reqwest::header::HeaderValue;
264
265    ClientOptions::new()
266        // Disables the time limit for downloading the response body.
267        .with_timeout_disabled()
268        // Set the time limit for establishing the connection.
269        .with_connect_timeout(std::time::Duration::from_secs(
270            std::env::var("POLARS_HTTP_CONNECT_TIMEOUT_SECONDS")
271                .map(|x| {
272                    x.parse::<NonZeroU64>()
273                        .ok()
274                        .unwrap_or_else(|| {
275                            panic!("invalid value for POLARS_HTTP_CONNECT_TIMEOUT_SECONDS: {x}")
276                        })
277                        .get()
278                })
279                .unwrap_or(5 * 60),
280        ))
281        .with_user_agent(HeaderValue::from_static(USER_AGENT))
282        .with_allow_http(true)
283}
284
285#[cfg(feature = "aws")]
286fn read_config(
287    builder: &mut AmazonS3Builder,
288    items: &[(&Path, &[(&str, AmazonS3ConfigKey)])],
289) -> Option<()> {
290    use crate::path_utils::resolve_homedir;
291
292    for (path, keys) in items {
293        if keys
294            .iter()
295            .all(|(_, key)| builder.get_config_value(key).is_some())
296        {
297            continue;
298        }
299
300        let mut config = std::fs::File::open(resolve_homedir(path)).ok()?;
301        let mut buf = vec![];
302        config.read_to_end(&mut buf).ok()?;
303        let content = std::str::from_utf8(buf.as_ref()).ok()?;
304
305        for (pattern, key) in keys.iter() {
306            if builder.get_config_value(key).is_none() {
307                let reg = polars_utils::regex_cache::compile_regex(pattern).unwrap();
308                let cap = reg.captures(content)?;
309                let m = cap.get(1)?;
310                let parsed = m.as_str();
311                *builder = std::mem::take(builder).with_config(*key, parsed);
312            }
313        }
314    }
315    Some(())
316}
317
318impl CloudOptions {
319    pub fn with_retry_config(mut self, retry_config: CloudRetryConfig) -> Self {
320        self.retry_config = retry_config;
321        self
322    }
323
324    #[cfg(feature = "cloud")]
325    pub fn with_credential_provider(
326        mut self,
327        credential_provider: Option<PlCredentialProvider>,
328    ) -> Self {
329        self.credential_provider = credential_provider;
330        self
331    }
332
333    /// Set the configuration for AWS connections. This is the preferred API from rust.
334    #[cfg(feature = "aws")]
335    pub fn with_aws<I: IntoIterator<Item = (AmazonS3ConfigKey, impl Into<String>)>>(
336        mut self,
337        configs: I,
338    ) -> Self {
339        self.config = Some(CloudConfig::Aws(
340            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
341        ));
342        self
343    }
344
345    /// Build the [`object_store::ObjectStore`] implementation for AWS.
346    #[cfg(feature = "aws")]
347    pub async fn build_aws(
348        &self,
349        url: PlRefPath,
350        clear_cached_credentials: bool,
351    ) -> PolarsResult<impl object_store::ObjectStore> {
352        use super::credential_provider::IntoCredentialProvider;
353
354        let opt_credential_provider =
355            self.initialized_credential_provider(clear_cached_credentials)?;
356
357        let mut builder = AmazonS3Builder::from_env()
358            .with_client_options(get_client_options())
359            .with_url(url.to_string());
360
361        if let Some(credential_provider) = &opt_credential_provider {
362            let storage_update_options = parse_untyped_config::<AmazonS3ConfigKey, _>(
363                credential_provider
364                    .storage_update_options()?
365                    .into_iter()
366                    .map(|(k, v)| (k, v.to_string())),
367            )?;
368
369            for (key, value) in storage_update_options {
370                builder = builder.with_config(key, value);
371            }
372        }
373
374        read_config(
375            &mut builder,
376            &[(
377                Path::new("~/.aws/config"),
378                &[("region\\s*=\\s*([^\r\n]*)", AmazonS3ConfigKey::Region)],
379            )],
380        );
381
382        read_config(
383            &mut builder,
384            &[(
385                Path::new("~/.aws/credentials"),
386                &[
387                    (
388                        "aws_access_key_id\\s*=\\s*([^\\r\\n]*)",
389                        AmazonS3ConfigKey::AccessKeyId,
390                    ),
391                    (
392                        "aws_secret_access_key\\s*=\\s*([^\\r\\n]*)",
393                        AmazonS3ConfigKey::SecretAccessKey,
394                    ),
395                    (
396                        "aws_session_token\\s*=\\s*([^\\r\\n]*)",
397                        AmazonS3ConfigKey::Token,
398                    ),
399                ],
400            )],
401        );
402
403        if let Some(options) = &self.config {
404            let CloudConfig::Aws(options) = options else {
405                panic!("impl error: cloud type mismatch")
406            };
407            for (key, value) in options {
408                builder = builder.with_config(*key, value);
409            }
410        }
411
412        if builder
413            .get_config_value(&AmazonS3ConfigKey::DefaultRegion)
414            .is_none()
415            && builder
416                .get_config_value(&AmazonS3ConfigKey::Region)
417                .is_none()
418        {
419            let bucket = crate::cloud::CloudLocation::new(url, false)?.bucket;
420            let region = {
421                let mut bucket_region = BUCKET_REGION.lock().unwrap();
422                bucket_region.get(bucket.as_str()).cloned()
423            };
424
425            match region {
426                Some(region) => {
427                    builder = builder.with_config(AmazonS3ConfigKey::Region, region.as_str())
428                },
429                None => {
430                    if builder
431                        .get_config_value(&AmazonS3ConfigKey::Endpoint)
432                        .is_some()
433                    {
434                        // Set a default value if the endpoint is not aws.
435                        // See: #13042
436                        builder = builder.with_config(AmazonS3ConfigKey::Region, "us-east-1");
437                    } else {
438                        polars_warn!(
439                            "'(default_)region' not set; polars will try to get it from bucket\n\nSet the region manually to silence this warning."
440                        );
441                        let result = with_concurrency_budget(1, || async {
442                            reqwest::Client::builder()
443                                .user_agent(USER_AGENT)
444                                .build()
445                                .unwrap()
446                                .head(format!("https://{bucket}.s3.amazonaws.com"))
447                                .send()
448                                .await
449                                .map_err(to_compute_err)
450                        })
451                        .await?;
452                        if let Some(region) = result.headers().get("x-amz-bucket-region") {
453                            let region =
454                                std::str::from_utf8(region.as_bytes()).map_err(to_compute_err)?;
455                            let mut bucket_region = BUCKET_REGION.lock().unwrap();
456                            bucket_region.insert(bucket, region.into());
457                            builder = builder.with_config(AmazonS3ConfigKey::Region, region)
458                        }
459                    }
460                },
461            };
462        };
463
464        let builder = builder.with_retry(self.retry_config.into());
465
466        let opt_credential_provider = match opt_credential_provider {
467            #[cfg(feature = "python")]
468            Some(PlCredentialProvider::Python(object)) => {
469                if pyo3::Python::attach(|py| {
470                    let Ok(func_object) = object
471                        .unwrap_as_provider_ref()
472                        .getattr(py, "_can_use_as_provider")
473                    else {
474                        return PolarsResult::Ok(true);
475                    };
476
477                    Ok(func_object.call0(py)?.extract::<bool>(py).unwrap())
478                })? {
479                    Some(PlCredentialProvider::Python(object))
480                } else {
481                    None
482                }
483            },
484
485            v => v,
486        };
487
488        let builder = if let Some(credential_provider) = opt_credential_provider {
489            builder.with_credentials(credential_provider.into_aws_provider())
490        } else {
491            builder
492        };
493
494        let out = builder.build()?;
495
496        Ok(out)
497    }
498
499    /// Set the configuration for Azure connections. This is the preferred API from rust.
500    #[cfg(feature = "azure")]
501    pub fn with_azure<I: IntoIterator<Item = (AzureConfigKey, impl Into<String>)>>(
502        mut self,
503        configs: I,
504    ) -> Self {
505        self.config = Some(CloudConfig::Azure(
506            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
507        ));
508        self
509    }
510
511    /// Build the [`object_store::ObjectStore`] implementation for Azure.
512    #[cfg(feature = "azure")]
513    pub fn build_azure(
514        &self,
515        url: PlRefPath,
516        clear_cached_credentials: bool,
517    ) -> PolarsResult<impl object_store::ObjectStore> {
518        use super::credential_provider::IntoCredentialProvider;
519
520        let verbose = polars_core::config::verbose();
521
522        // The credential provider `self.credentials` is prioritized if it is set. We also need
523        // `from_env()` as it may source environment configured storage account name.
524        let mut builder =
525            MicrosoftAzureBuilder::from_env().with_client_options(get_client_options());
526
527        if let Some(options) = &self.config {
528            let CloudConfig::Azure(options) = options else {
529                panic!("impl error: cloud type mismatch")
530            };
531            for (key, value) in options.iter() {
532                builder = builder.with_config(*key, value);
533            }
534        }
535
536        let builder = builder
537            .with_url(url.to_string())
538            .with_retry(self.retry_config.into());
539
540        let builder =
541            if let Some(v) = self.initialized_credential_provider(clear_cached_credentials)? {
542                if verbose {
543                    eprintln!(
544                        "[CloudOptions::build_azure]: Using credential provider {:?}",
545                        &v
546                    );
547                }
548                builder.with_credentials(v.into_azure_provider())
549            } else {
550                builder
551            };
552
553        let out = builder.build()?;
554
555        Ok(out)
556    }
557
558    /// Set the configuration for GCP connections. This is the preferred API from rust.
559    #[cfg(feature = "gcp")]
560    pub fn with_gcp<I: IntoIterator<Item = (GoogleConfigKey, impl Into<String>)>>(
561        mut self,
562        configs: I,
563    ) -> Self {
564        self.config = Some(CloudConfig::Gcp(
565            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
566        ));
567        self
568    }
569
570    /// Build the [`object_store::ObjectStore`] implementation for GCP.
571    #[cfg(feature = "gcp")]
572    pub fn build_gcp(
573        &self,
574        url: PlRefPath,
575        clear_cached_credentials: bool,
576    ) -> PolarsResult<impl object_store::ObjectStore> {
577        use super::credential_provider::IntoCredentialProvider;
578
579        let credential_provider = self.initialized_credential_provider(clear_cached_credentials)?;
580
581        let builder = if credential_provider.is_none() {
582            GoogleCloudStorageBuilder::from_env()
583        } else {
584            GoogleCloudStorageBuilder::new()
585        };
586
587        let mut builder = builder.with_client_options(get_client_options());
588
589        if let Some(options) = &self.config {
590            let CloudConfig::Gcp(options) = options else {
591                panic!("impl error: cloud type mismatch")
592            };
593            for (key, value) in options.iter() {
594                builder = builder.with_config(*key, value);
595            }
596        }
597
598        let builder = builder
599            .with_url(url.to_string())
600            .with_retry(self.retry_config.into());
601
602        let builder = if let Some(v) = credential_provider {
603            builder.with_credentials(v.into_gcp_provider())
604        } else {
605            builder
606        };
607
608        let out = builder.build()?;
609
610        Ok(out)
611    }
612
613    #[cfg(feature = "http")]
614    pub fn build_http(&self, url: &str) -> PolarsResult<impl object_store::ObjectStore> {
615        let out = object_store::http::HttpBuilder::new()
616            .with_url(url.to_string())
617            .with_client_options({
618                let mut opts = super::get_client_options();
619                if let Some(CloudConfig::Http { headers }) = &self.config {
620                    opts = opts.with_default_headers(try_build_http_header_map_from_items_slice(
621                        headers.as_slice(),
622                    )?);
623                }
624                opts
625            })
626            .build()?;
627
628        Ok(out)
629    }
630
631    /// Parse a configuration from a Hashmap. This is the interface from Python.
632    #[allow(unused_variables)]
633    pub fn from_untyped_config<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
634        scheme: Option<CloudScheme>,
635        config: I,
636    ) -> PolarsResult<Self> {
637        match scheme.map_or(CloudType::File, CloudType::from_cloud_scheme) {
638            CloudType::Aws => {
639                #[cfg(feature = "aws")]
640                {
641                    parse_untyped_config::<AmazonS3ConfigKey, _>(config)
642                        .map(|aws| Self::default().with_aws(aws))
643                }
644                #[cfg(not(feature = "aws"))]
645                {
646                    polars_bail!(ComputeError: "'aws' feature is not enabled");
647                }
648            },
649            CloudType::Azure => {
650                #[cfg(feature = "azure")]
651                {
652                    parse_untyped_config::<AzureConfigKey, _>(config)
653                        .map(|azure| Self::default().with_azure(azure))
654                }
655                #[cfg(not(feature = "azure"))]
656                {
657                    polars_bail!(ComputeError: "'azure' feature is not enabled");
658                }
659            },
660            CloudType::File => Ok(Self::default()),
661            CloudType::Http => Ok(Self::default()),
662            CloudType::Gcp => {
663                #[cfg(feature = "gcp")]
664                {
665                    parse_untyped_config::<GoogleConfigKey, _>(config)
666                        .map(|gcp| Self::default().with_gcp(gcp))
667                }
668                #[cfg(not(feature = "gcp"))]
669                {
670                    polars_bail!(ComputeError: "'gcp' feature is not enabled");
671                }
672            },
673            CloudType::Hf => {
674                #[cfg(feature = "http")]
675                {
676                    use polars_core::config;
677
678                    use crate::path_utils::resolve_homedir;
679
680                    let mut this = Self::default();
681                    let mut token = None;
682                    let verbose = config::verbose();
683
684                    for (i, (k, v)) in config.into_iter().enumerate() {
685                        let (k, v) = (k.as_ref(), v.into());
686
687                        if i == 0 && k == "token" {
688                            if verbose {
689                                eprintln!("HF token sourced from storage_options");
690                            }
691                            token = Some(v);
692                        } else {
693                            polars_bail!(ComputeError: "unknown configuration key for HF: {}", k)
694                        }
695                    }
696
697                    token = token
698                        .or_else(|| {
699                            let v = std::env::var("HF_TOKEN").ok();
700                            if v.is_some() && verbose {
701                                eprintln!("HF token sourced from HF_TOKEN env var");
702                            }
703                            v
704                        })
705                        .or_else(|| {
706                            let hf_home = std::env::var("HF_HOME");
707                            let hf_home = hf_home.as_deref();
708                            let hf_home = hf_home.unwrap_or("~/.cache/huggingface");
709                            let hf_home = resolve_homedir(hf_home);
710                            let cached_token_path = hf_home.join("token");
711
712                            let v = std::string::String::from_utf8(
713                                std::fs::read(&cached_token_path).ok()?,
714                            )
715                            .ok()
716                            .filter(|x| !x.is_empty());
717
718                            if v.is_some() && verbose {
719                                eprintln!("HF token sourced from {:?}", cached_token_path);
720                            }
721
722                            v
723                        });
724
725                    if let Some(v) = token {
726                        this.config = Some(CloudConfig::Http {
727                            headers: vec![("Authorization".into(), format!("Bearer {v}"))],
728                        })
729                    }
730
731                    Ok(this)
732                }
733                #[cfg(not(feature = "http"))]
734                {
735                    polars_bail!(ComputeError: "'http' feature is not enabled");
736                }
737            },
738        }
739    }
740
741    /// Python passes a credential provider builder that needs to be called to get the actual credential
742    /// provider.
743    #[cfg(feature = "cloud")]
744    fn initialized_credential_provider(
745        &self,
746        clear_cached_credentials: bool,
747    ) -> PolarsResult<Option<PlCredentialProvider>> {
748        if let Some(v) = self.credential_provider.clone() {
749            v.try_into_initialized(clear_cached_credentials)
750        } else {
751            Ok(None)
752        }
753    }
754}
755
756#[cfg(feature = "cloud")]
757#[cfg(test)]
758mod tests {
759    use hashbrown::HashMap;
760
761    use super::parse_untyped_config;
762
763    #[cfg(feature = "aws")]
764    #[test]
765    fn test_parse_untyped_config() {
766        use object_store::aws::AmazonS3ConfigKey;
767
768        let aws_config = [
769            ("aws_secret_access_key", "a_key"),
770            ("aws_s3_allow_unsafe_rename", "true"),
771        ]
772        .into_iter()
773        .collect::<HashMap<_, _>>();
774        let aws_keys = parse_untyped_config::<AmazonS3ConfigKey, _>(aws_config)
775            .expect("Parsing keys shouldn't have thrown an error");
776
777        assert_eq!(
778            aws_keys.first().unwrap().0,
779            AmazonS3ConfigKey::SecretAccessKey
780        );
781        assert_eq!(aws_keys.len(), 1);
782
783        let aws_config = [
784            ("AWS_SECRET_ACCESS_KEY", "a_key"),
785            ("aws_s3_allow_unsafe_rename", "true"),
786        ]
787        .into_iter()
788        .collect::<HashMap<_, _>>();
789        let aws_keys = parse_untyped_config::<AmazonS3ConfigKey, _>(aws_config)
790            .expect("Parsing keys shouldn't have thrown an error");
791
792        assert_eq!(
793            aws_keys.first().unwrap().0,
794            AmazonS3ConfigKey::SecretAccessKey
795        );
796        assert_eq!(aws_keys.len(), 1);
797    }
798}