1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, LazyLock, Mutex, OnceLock};
use tempfile::TempDir;
use crate::{cloud_store::CloudObject, CloudError, Uri, UriError};
static TEMP_DIR: OnceLock<TempDir> = OnceLock::new();
fn temp_dir() -> &'static std::path::Path {
TEMP_DIR
.get_or_init(|| TempDir::new().expect("temp dir"))
.path()
}
static GLOBAL_CACHE: LazyLock<Mutex<FileCache>> = LazyLock::new(|| {
let cache = std::env::var("FILEMANAGER_CACHE_DIR")
.ok()
.and_then(|dir| FileCache::persistent(dir).ok())
.unwrap_or_default();
Mutex::new(cache)
});
static IN_FLIGHT: LazyLock<Mutex<HashMap<PathBuf, Arc<Mutex<()>>>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));
fn in_flight_lock(path: &Path) -> Arc<Mutex<()>> {
let mut map = IN_FLIGHT.lock().expect("in_flight map mutex poisoned");
map.entry(path.to_path_buf())
.or_insert_with(|| Arc::new(Mutex::new(())))
.clone()
}
pub fn set_global_cache(file_cache: FileCache) {
*GLOBAL_CACHE.lock().unwrap() = file_cache;
}
pub fn global_cache() -> FileCache {
GLOBAL_CACHE.lock().unwrap().clone()
}
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum CacheError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Cloud(#[from] CloudError),
#[error("{0}")]
Uri(#[from] UriError),
#[error("No cache has been set")]
NoCacheDirectory,
}
#[derive(Debug, Clone, Default)]
pub struct FileCache {
dir: Option<std::path::PathBuf>,
}
impl FileCache {
pub fn persistent(
dir: impl Into<std::path::PathBuf>,
) -> Result<Self, CacheError> {
let dir = dir.into();
std::fs::create_dir_all(&dir)?;
Ok(FileCache { dir: Some(dir) })
}
pub fn temp() -> Self {
FileCache {
dir: Some(temp_dir().to_path_buf()),
}
}
pub fn none() -> Self {
FileCache { dir: None }
}
/// Downloads (or passes through) `uri` into the cache directory.
///
/// Local URIs are returned unchanged. Cloud URIs are downloaded atomically
/// into the cache directory and the resulting local URI is returned.
/// If the cache file already exists it is reused without re-downloading.
/// Concurrent calls for the same URI within a process are coalesced:
/// only one download runs while the others wait and reuse the result.
pub fn cache(&self, uri: impl Into<Uri>) -> Result<Uri, CacheError> {
let dir = self.dir.as_ref().ok_or(CacheError::NoCacheDirectory)?;
let uri = uri.into();
if uri.is_local() {
return Ok(uri);
}
let key = uri.key().ok_or(UriError::InvalidUri(uri.clone()))?;
let cache_path = dir.join(key);
if cache_path.exists() {
return Ok(Uri::from(cache_path));
}
let lock = in_flight_lock(&cache_path);
let _guard = lock.lock().expect("in_flight per-key mutex poisoned");
if cache_path.exists() {
return Ok(Uri::from(cache_path));
}
let tmp_path = dir.join(format!("{}.tmp", key));
let obj = CloudObject::new(uri)?;
obj.download_to(&tmp_path)?;
std::fs::rename(&tmp_path, &cache_path)?;
Ok(Uri::from(cache_path))
}
/// Returns the local cache URI for `uri` iff a cache file already exists.
///
/// Never downloads, never issues network requests, never errors.
/// Local URIs pass through unchanged. For cloud URIs, returns
/// `Some(local_uri)` only when the corresponding cache file is already
/// present on disk; otherwise returns `None`.
///
/// This is the cache-aware probe used by callers (such as path
/// resolvers) that would otherwise issue cloud HEAD requests to check
/// for file existence.
pub fn cached_local(&self, uri: impl Into<Uri>) -> Option<Uri> {
let uri = uri.into();
if uri.is_local() {
return Some(uri);
}
let dir = self.dir.as_ref()?;
let key = uri.key()?;
let cache_path = dir.join(key);
if cache_path.is_file() {
Some(Uri::from(cache_path))
} else {
None
}
}
/// Returns the local cache URI for `uri` without checking existence.
///
/// Like [`cached_local`](Self::cached_local) but never touches the
/// filesystem. Local URIs pass through unchanged. For cloud URIs,
/// returns the path the file *would* live at inside the cache
/// directory (whether or not it has been downloaded yet), or `None`
/// when the URI has no key or no cache directory is configured.
pub fn local_path(&self, uri: impl Into<Uri>) -> Option<Uri> {
let uri = uri.into();
if uri.is_local() {
return Some(uri);
}
let dir = self.dir.as_ref()?;
let key = uri.key()?;
Some(Uri::from(dir.join(key)))
}
pub fn invalidate(&self, uri: impl Into<Uri>) -> Result<(), CacheError> {
match &self.dir {
Some(dir) => {
let uri = uri.into();
let cache_path = match uri.as_path() {
Some(p) => {
if p.starts_with(dir) {
p.to_path_buf()
} else {
return Ok(()); // Not in cache, nothing to invalidate
}
},
None => {
let key = uri
.key()
.ok_or(UriError::InvalidUri(uri.clone()))?;
dir.join(key)
},
};
Ok(std::fs::remove_file(cache_path)?)
},
None => Ok(()), // No cache, nothing to invalidate
}
}
}
impl Uri {
pub fn try_cache(&self) -> Result<Uri, CacheError> {
let file_cache = crate::global_cache();
file_cache.cache(self.clone())
}
pub fn force_cache(&self) -> Result<Uri, CacheError> {
match self.try_cache() {
Ok(uri) => Ok(uri),
Err(_) => {
let cache = FileCache::temp();
cache.cache(self.clone())
},
}
}
pub fn soft_cache(&self) -> Uri {
self.try_cache().unwrap_or_else(|_| self.clone())
}
/// Returns the local cache URI iff this URI is already cached on disk.
///
/// Never downloads, never issues network requests. Local URIs pass
/// through unchanged. For cloud URIs, returns `Some(local_uri)` only
/// when the cache file already exists; otherwise returns `None`.
pub fn cached_local(&self) -> Option<Uri> {
crate::global_cache().cached_local(self.clone())
}
/// Returns the local-filesystem representation of this URI, without
/// performing any network or existence checks.
///
/// Local URIs are returned unchanged. Cloud URIs are mapped to the
/// path they would occupy inside the configured cache directory
/// (regardless of whether the file has been downloaded). If no cache
/// directory is configured or the URI has no key, the URI is returned
/// unchanged.
///
/// Useful as a fast pre-check: callers can probe the returned URI
/// with local filesystem operations and only fall back to network
/// access if those probes fail.
pub fn local_representation(&self) -> Uri {
crate::global_cache()
.local_path(self.clone())
.unwrap_or_else(|| self.clone())
}
}