object_store_wasm/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// NB: Replicated from object_store, modified to remove unsupported
19// schemes. references adjusted where applicable.
20use object_store::path::Path;
21use object_store::ObjectStore;
22use snafu::Snafu;
23use url::Url;
24
25#[derive(Debug, Snafu)]
26enum Error {
27    #[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
28    InvalidUrl { url: Url },
29
30    #[snafu(display("Unable to recognise URL \"{}\"", url))]
31    Unrecognised { url: Url },
32
33    #[snafu(display("Feature {scheme:?} not enabled"))]
34    NotEnabled { scheme: ObjectStoreScheme },
35
36    #[snafu(context(false))]
37    Path { source: object_store::path::Error },
38}
39
40impl From<Error> for object_store::Error {
41    fn from(e: Error) -> Self {
42        Self::Generic {
43            store: "URL",
44            source: Box::new(e),
45        }
46    }
47}
48
49/// Recognises various URL formats, identifying the relevant [`ObjectStore`]
50#[derive(Debug, Eq, PartialEq)]
51enum ObjectStoreScheme {
52    // /// Url corresponding to [`LocalFileSystem`]
53    // Local,
54    // /// Url corresponding to [`InMemory`]
55    // Memory,
56    /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
57    AmazonS3,
58    // /// Url corresponding to [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
59    // GoogleCloudStorage,
60    // /// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
61    // MicrosoftAzure,
62    /// Url corresponding to [`HttpStore`](crate::http::HttpStore)
63    Http,
64}
65
66impl ObjectStoreScheme {
67    /// Create an [`ObjectStoreScheme`] from the provided [`Url`]
68    ///
69    /// Returns the [`ObjectStoreScheme`] and the remaining [`Path`]
70    fn parse(url: &Url) -> Result<(Self, Path), Error> {
71        let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);
72
73        let (scheme, path) = match (url.scheme(), url.host_str()) {
74            // ("file", None) => (Self::Local, url.path()),
75            // ("memory", None) => (Self::Memory, url.path()),
76            ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
77            // ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
78            // ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
79            //     (Self::MicrosoftAzure, url.path())
80            // }
81            ("http", Some(_)) => (Self::Http, url.path()),
82            ("https", Some(host)) => {
83                // if host.ends_with("dfs.core.windows.net")
84                //     || host.ends_with("blob.core.windows.net")
85                //     || host.ends_with("dfs.fabric.microsoft.com")
86                //     || host.ends_with("blob.fabric.microsoft.com")
87                // {
88                //     (Self::MicrosoftAzure, url.path())
89                // } else
90                if host.ends_with("amazonaws.com") {
91                    match host.starts_with("s3") {
92                        true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
93                        false => (Self::AmazonS3, url.path()),
94                    }
95                } else if host.ends_with("r2.cloudflarestorage.com") {
96                    (Self::AmazonS3, strip_bucket().unwrap_or_default())
97                } else {
98                    (Self::Http, url.path())
99                }
100            }
101            _ => return Err(Error::Unrecognised { url: url.clone() }),
102        };
103
104        Ok((scheme, Path::from_url_path(path)?))
105    }
106}
107
108macro_rules! builder_opts {
109    ($builder:ty, $url:expr, $options:expr) => {{
110        let builder = $options.into_iter().fold(
111            <$builder>::new().with_url($url.to_string()),
112            |builder, (key, value)| match key.as_ref().parse() {
113                Ok(k) => builder.with_config(k, value),
114                Err(_) => builder,
115            },
116        );
117        Box::new(builder.build()?) as _
118    }};
119}
120
121/// Create an [`ObjectStore`] based on the provided `url`
122///
123/// Returns
124/// - An [`ObjectStore`] of the corresponding type
125/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
126pub fn parse_url(url: &Url) -> Result<(Box<dyn ObjectStore>, Path), object_store::Error> {
127    parse_url_opts(url, std::iter::empty::<(&str, &str)>())
128}
129
130/// Create an [`ObjectStore`] based on the provided `url` and options
131///
132/// Returns
133/// - An [`ObjectStore`] of the corresponding type
134/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
135pub fn parse_url_opts<I, K, V>(
136    url: &Url,
137    options: I,
138) -> Result<(Box<dyn ObjectStore>, Path), object_store::Error>
139where
140    I: IntoIterator<Item = (K, V)>,
141    K: AsRef<str>,
142    V: Into<String>,
143{
144    let _options = options;
145    let (scheme, path) = ObjectStoreScheme::parse(url)?;
146    let path = Path::parse(path)?;
147
148    let store: Box<dyn ObjectStore> = match scheme {
149        #[cfg(feature = "aws")]
150        ObjectStoreScheme::AmazonS3 => {
151            builder_opts!(crate::aws::builder::AmazonS3Builder, url, _options)
152        }
153        #[cfg(feature = "http")]
154        ObjectStoreScheme::Http => {
155            let url = &url[..url::Position::BeforePath];
156            let parsed_url = Url::parse(url).unwrap();
157            Box::new(crate::http::HttpStore::new(parsed_url))
158            // builder_opts!(crate::http::HttpBuilder, url, _options)
159        }
160        #[cfg(not(all(feature = "aws", feature = "http")))]
161        s => {
162            return Err(object_store::Error::Generic {
163                store: "parse_url",
164                source: format!("feature for {s:?} not enabled").into(),
165            })
166        }
167    };
168
169    Ok((store, path))
170}