object_store_wasm/parse.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// NB: Replicated from object_store, modified to remove unsupported
19// schemes. references adjusted where applicable.
20use object_store::path::Path;
21use object_store::ObjectStore;
22use snafu::Snafu;
23use url::Url;
24
25#[derive(Debug, Snafu)]
26enum Error {
27 #[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
28 InvalidUrl { url: Url },
29
30 #[snafu(display("Unable to recognise URL \"{}\"", url))]
31 Unrecognised { url: Url },
32
33 #[snafu(display("Feature {scheme:?} not enabled"))]
34 NotEnabled { scheme: ObjectStoreScheme },
35
36 #[snafu(context(false))]
37 Path { source: object_store::path::Error },
38}
39
40impl From<Error> for object_store::Error {
41 fn from(e: Error) -> Self {
42 Self::Generic {
43 store: "URL",
44 source: Box::new(e),
45 }
46 }
47}
48
49/// Recognises various URL formats, identifying the relevant [`ObjectStore`]
50#[derive(Debug, Eq, PartialEq)]
51enum ObjectStoreScheme {
52 // /// Url corresponding to [`LocalFileSystem`]
53 // Local,
54 // /// Url corresponding to [`InMemory`]
55 // Memory,
56 /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
57 AmazonS3,
58 // /// Url corresponding to [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
59 // GoogleCloudStorage,
60 // /// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
61 // MicrosoftAzure,
62 /// Url corresponding to [`HttpStore`](crate::http::HttpStore)
63 Http,
64}
65
66impl ObjectStoreScheme {
67 /// Create an [`ObjectStoreScheme`] from the provided [`Url`]
68 ///
69 /// Returns the [`ObjectStoreScheme`] and the remaining [`Path`]
70 fn parse(url: &Url) -> Result<(Self, Path), Error> {
71 let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);
72
73 let (scheme, path) = match (url.scheme(), url.host_str()) {
74 // ("file", None) => (Self::Local, url.path()),
75 // ("memory", None) => (Self::Memory, url.path()),
76 ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
77 // ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
78 // ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
79 // (Self::MicrosoftAzure, url.path())
80 // }
81 ("http", Some(_)) => (Self::Http, url.path()),
82 ("https", Some(host)) => {
83 // if host.ends_with("dfs.core.windows.net")
84 // || host.ends_with("blob.core.windows.net")
85 // || host.ends_with("dfs.fabric.microsoft.com")
86 // || host.ends_with("blob.fabric.microsoft.com")
87 // {
88 // (Self::MicrosoftAzure, url.path())
89 // } else
90 if host.ends_with("amazonaws.com") {
91 match host.starts_with("s3") {
92 true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
93 false => (Self::AmazonS3, url.path()),
94 }
95 } else if host.ends_with("r2.cloudflarestorage.com") {
96 (Self::AmazonS3, strip_bucket().unwrap_or_default())
97 } else {
98 (Self::Http, url.path())
99 }
100 }
101 _ => return Err(Error::Unrecognised { url: url.clone() }),
102 };
103
104 Ok((scheme, Path::from_url_path(path)?))
105 }
106}
107
108macro_rules! builder_opts {
109 ($builder:ty, $url:expr, $options:expr) => {{
110 let builder = $options.into_iter().fold(
111 <$builder>::new().with_url($url.to_string()),
112 |builder, (key, value)| match key.as_ref().parse() {
113 Ok(k) => builder.with_config(k, value),
114 Err(_) => builder,
115 },
116 );
117 Box::new(builder.build()?) as _
118 }};
119}
120
121/// Create an [`ObjectStore`] based on the provided `url`
122///
123/// Returns
124/// - An [`ObjectStore`] of the corresponding type
125/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
126pub fn parse_url(url: &Url) -> Result<(Box<dyn ObjectStore>, Path), object_store::Error> {
127 parse_url_opts(url, std::iter::empty::<(&str, &str)>())
128}
129
130/// Create an [`ObjectStore`] based on the provided `url` and options
131///
132/// Returns
133/// - An [`ObjectStore`] of the corresponding type
134/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
135pub fn parse_url_opts<I, K, V>(
136 url: &Url,
137 options: I,
138) -> Result<(Box<dyn ObjectStore>, Path), object_store::Error>
139where
140 I: IntoIterator<Item = (K, V)>,
141 K: AsRef<str>,
142 V: Into<String>,
143{
144 let _options = options;
145 let (scheme, path) = ObjectStoreScheme::parse(url)?;
146 let path = Path::parse(path)?;
147
148 let store: Box<dyn ObjectStore> = match scheme {
149 #[cfg(feature = "aws")]
150 ObjectStoreScheme::AmazonS3 => {
151 builder_opts!(crate::aws::builder::AmazonS3Builder, url, _options)
152 }
153 #[cfg(feature = "http")]
154 ObjectStoreScheme::Http => {
155 let url = &url[..url::Position::BeforePath];
156 let parsed_url = Url::parse(url).unwrap();
157 Box::new(crate::http::HttpStore::new(parsed_url))
158 // builder_opts!(crate::http::HttpBuilder, url, _options)
159 }
160 #[cfg(not(all(feature = "aws", feature = "http")))]
161 s => {
162 return Err(object_store::Error::Generic {
163 store: "parse_url",
164 source: format!("feature for {s:?} not enabled").into(),
165 })
166 }
167 };
168
169 Ok((store, path))
170}