mountpoint_s3_fs/fs/
config.rs

1use std::time::Duration;
2
3use nix::unistd::{getgid, getuid};
4
5use crate::mem_limiter::MINIMUM_MEM_LIMIT;
6use crate::metablock::WriteMode;
7use crate::prefetch::PrefetcherConfig;
8use crate::s3::S3Personality;
9
10use super::{ServerSideEncryption, TimeToLive};
11
12#[derive(Debug)]
13pub struct S3FilesystemConfig {
14    /// Kernel cache config
15    pub cache_config: CacheConfig,
16    /// Readdir page size
17    pub readdir_size: usize,
18    /// User id
19    pub uid: u32,
20    /// Group id
21    pub gid: u32,
22    /// Directory permissions
23    pub dir_mode: u16,
24    /// File permissions
25    pub file_mode: u16,
26    /// Allow delete
27    pub allow_delete: bool,
28    /// Allow overwrite
29    pub allow_overwrite: bool,
30    /// Allow renames
31    pub allow_rename: bool,
32    /// Enable incremental uploads
33    pub incremental_upload: bool,
34    /// Storage class to be used for new object uploads
35    pub storage_class: Option<String>,
36    /// S3 personality (for different S3 semantics)
37    pub s3_personality: S3Personality,
38    /// Server side encryption configuration to be used when creating new S3 object
39    pub server_side_encryption: ServerSideEncryption,
40    /// Use additional checksums for uploads
41    pub use_upload_checksums: bool,
42    /// Memory limit
43    pub mem_limit: u64,
44    /// Prefetcher configuration
45    pub prefetcher_config: PrefetcherConfig,
46    /// Limits the number of concurrent FUSE requests that the kernel may send. Default is 64.
47    /// This option may also be configured by `UNSTABLE_MOUNTPOINT_MAX_BACKGROUND` environment variable,
48    /// but the value specified in the config takes priority.
49    pub max_background_fuse_requests: Option<u16>,
50}
51
52impl Default for S3FilesystemConfig {
53    fn default() -> Self {
54        let uid = getuid().into();
55        let gid = getgid().into();
56
57        Self {
58            cache_config: Default::default(),
59            readdir_size: 100,
60            uid,
61            gid,
62            dir_mode: 0o755,
63            file_mode: 0o644,
64            allow_delete: false,
65            allow_overwrite: false,
66            incremental_upload: false,
67            allow_rename: true,
68            storage_class: None,
69            s3_personality: S3Personality::default(),
70            server_side_encryption: Default::default(),
71            use_upload_checksums: true,
72            mem_limit: MINIMUM_MEM_LIMIT,
73            prefetcher_config: Default::default(),
74            max_background_fuse_requests: None,
75        }
76    }
77}
78
79impl S3FilesystemConfig {
80    pub fn write_mode(&self) -> WriteMode {
81        WriteMode {
82            allow_overwrite: self.allow_overwrite,
83            incremental_upload: self.incremental_upload,
84        }
85    }
86
87    pub fn max_background_fuse_requests(&self) -> Option<u16> {
88        // NOTE: Support for this environment variable may be removed in future without notice.
89        const ENV_VAR_KEY_MAX_BACKGROUND: &str = "UNSTABLE_MOUNTPOINT_MAX_BACKGROUND";
90        if self.max_background_fuse_requests.is_some() {
91            self.max_background_fuse_requests
92        } else if let Some(user_max_background) = std::env::var_os(ENV_VAR_KEY_MAX_BACKGROUND) {
93            let max_background = Self::parse_env_var_to_u16(ENV_VAR_KEY_MAX_BACKGROUND, user_max_background);
94            Some(max_background)
95        } else {
96            None
97        }
98    }
99
100    pub fn fuse_congestion_threshold(&self) -> Option<u16> {
101        // NOTE: Support for this environment variable may be removed in future without notice.
102        const ENV_VAR_KEY_CONGESTION_THRESHOLD: &str = "UNSTABLE_MOUNTPOINT_CONGESTION_THRESHOLD";
103        std::env::var_os(ENV_VAR_KEY_CONGESTION_THRESHOLD).map(|user_congestion_threshold| {
104            Self::parse_env_var_to_u16(ENV_VAR_KEY_CONGESTION_THRESHOLD, user_congestion_threshold)
105        })
106    }
107
108    /// Helper to return the u16 value in an environment variable, or panic.  Useful for unstable overrides.
109    fn parse_env_var_to_u16(var_name: &str, var_value: std::ffi::OsString) -> u16 {
110        var_value
111            .to_string_lossy()
112            .parse::<u16>()
113            .unwrap_or_else(|_| panic!("Invalid value for environment variable {var_name}. Must be positive integer."))
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct CacheConfig {
119    /// Should the file system serve lookup requests including open from cached entries,
120    /// or instead check S3 even when a valid cached entry may be available?
121    ///
122    /// Even when disabled, some operations such as `getattr` are allowed to be served from cache
123    /// with a short TTL since Linux filesystems behave badly when the TTL is zero.
124    /// For example, results from `readdir` would expire immediately, and the kernel would
125    /// immediately `getattr` every entry returned from `readdir`.
126    pub serve_lookup_from_cache: bool,
127    /// How long the kernel will cache metadata for files
128    pub file_ttl: Duration,
129    /// How long the kernel will cache metadata for directories
130    pub dir_ttl: Duration,
131    /// Should the file system cache negative lookups?
132    pub use_negative_cache: bool,
133    /// How long the file system will cache negative entries
134    pub negative_cache_ttl: Duration,
135    /// Maximum number of negative entries to cache.
136    pub negative_cache_size: usize,
137}
138
139impl Default for CacheConfig {
140    fn default() -> Self {
141        // We want to do as little caching as possible by default,
142        // but Linux filesystems behave badly when the TTL is exactly zero.
143        // For example, results from `readdir` will expire immediately, and so
144        // the kernel will immediately re-lookup every entry returned from `readdir`. So we apply
145        // small non-zero TTLs. The goal is to be small enough that the impact on consistency is
146        // minimal, but large enough that a single cache miss doesn't cause a cascading effect where
147        // every other cache entry expires by the time that cache miss is serviced. We also apply a
148        // longer TTL for directories, which are both less likely to change on the S3 side and
149        // checked more often (for directory permissions checks).
150        let file_ttl = Duration::from_millis(100);
151        let dir_ttl = Duration::from_millis(1000);
152
153        // We want the negative cache to be effective but need to limit its memory usage. This value
154        // results in a maximum memory usage of ~20MB (assuming average file name length of 37 bytes)
155        // and should be large enough for many workloads. The metrics in
156        // `metadata_cache.negative_cache`, in particular `entries_evicted_before_expiry`, can be
157        // monitored to verify if this limit needs reviewing.
158        let negative_cache_size = 100_000;
159
160        Self {
161            serve_lookup_from_cache: false,
162            file_ttl,
163            dir_ttl,
164            use_negative_cache: false,
165            negative_cache_ttl: file_ttl,
166            negative_cache_size,
167        }
168    }
169}
170
171impl CacheConfig {
172    /// Construct cache configuration settings from metadata TTL.
173    pub fn new(metadata_ttl: TimeToLive) -> Self {
174        match metadata_ttl {
175            TimeToLive::Minimal => Default::default(),
176            TimeToLive::Indefinite => Self {
177                serve_lookup_from_cache: true,
178                file_ttl: TimeToLive::INDEFINITE_DURATION,
179                dir_ttl: TimeToLive::INDEFINITE_DURATION,
180                use_negative_cache: true,
181                negative_cache_ttl: TimeToLive::INDEFINITE_DURATION,
182                ..Default::default()
183            },
184            TimeToLive::Duration(ttl) => Self {
185                serve_lookup_from_cache: true,
186                file_ttl: ttl,
187                dir_ttl: ttl,
188                use_negative_cache: true,
189                negative_cache_ttl: ttl,
190                ..Default::default()
191            },
192        }
193    }
194
195    pub fn with_negative_metadata_ttl(self, negative_metadata_ttl: TimeToLive) -> Self {
196        match negative_metadata_ttl {
197            TimeToLive::Minimal => Self {
198                use_negative_cache: false,
199                negative_cache_ttl: Self::default().negative_cache_ttl,
200                ..self
201            },
202            TimeToLive::Indefinite => Self {
203                use_negative_cache: true,
204                negative_cache_ttl: TimeToLive::INDEFINITE_DURATION,
205                ..self
206            },
207            TimeToLive::Duration(ttl) => Self {
208                use_negative_cache: true,
209                negative_cache_ttl: ttl,
210                ..self
211            },
212        }
213    }
214}