use std::fmt;
use std::path::Path;
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use super::error::StorageError;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Scheme {
File,
Memory,
S3,
Gcs,
Azure,
R2,
}
impl fmt::Display for Scheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::File => "file",
Self::Memory => "memory",
Self::S3 => "s3",
Self::Gcs => "gs",
Self::Azure => "azure",
Self::R2 => "r2",
};
f.write_str(s)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct StorageUrl(String);
impl StorageUrl {
pub fn parse(input: &str) -> Result<Self, StorageError> {
if input.is_empty() {
return Err(StorageError::InvalidUrl {
input: input.to_string(),
reason: "URL is empty".into(),
});
}
if let Some((scheme_str, _)) = input.split_once("://") {
Self::parse_scheme(scheme_str)?;
return Ok(Self(input.to_string()));
}
let path = Path::new(input);
let normalised = path.to_string_lossy().replace('\\', "/");
let url = if normalised.starts_with('/') {
format!("file://{normalised}")
} else {
format!("file://./{normalised}")
};
Ok(Self(url))
}
pub fn memory(virtual_path: &str) -> Self {
let trimmed = virtual_path.trim_start_matches('/');
Self(format!("memory:///{trimmed}"))
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn scheme(&self) -> Scheme {
let prefix = self.0.split_once("://").map(|(s, _)| s).unwrap_or("");
Self::parse_scheme(prefix).expect("StorageUrl invariant: scheme always valid")
}
pub fn path(&self) -> &str {
self.0.split_once("://").map(|(_, p)| p).unwrap_or("")
}
fn parse_scheme(s: &str) -> Result<Scheme, StorageError> {
match s {
"file" => Ok(Scheme::File),
"memory" => Ok(Scheme::Memory),
"s3" => Ok(Scheme::S3),
"gs" | "gcs" => Ok(Scheme::Gcs),
"azure" | "abfss" => Ok(Scheme::Azure),
"r2" => Ok(Scheme::R2),
other => Err(StorageError::InvalidUrl {
input: other.to_string(),
reason: format!("unknown scheme '{other}://'"),
}),
}
}
}
impl fmt::Display for StorageUrl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl FromStr for StorageUrl {
type Err = StorageError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_file_url() {
let u = StorageUrl::parse("file:///tmp/data.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::File);
assert_eq!(u.path(), "/tmp/data.parquet");
}
#[test]
fn parses_s3_url() {
let u = StorageUrl::parse("s3://benchmarks/snapshots/2026.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::S3);
assert_eq!(u.path(), "benchmarks/snapshots/2026.parquet");
}
#[test]
fn parses_gs_url() {
let u = StorageUrl::parse("gs://archives/2026/jan.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::Gcs);
}
#[test]
fn parses_azure_url() {
let u = StorageUrl::parse("azure://archives/snapshots/x.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::Azure);
}
#[test]
fn parses_r2_url() {
let u = StorageUrl::parse("r2://archives/2026/jan.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::R2);
assert_eq!(u.path(), "archives/2026/jan.parquet");
}
#[test]
fn r2_scheme_round_trips_through_display() {
assert_eq!(Scheme::R2.to_string(), "r2");
}
#[test]
fn bare_absolute_path_becomes_file_url() {
let u = StorageUrl::parse("/tmp/foo.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::File);
assert_eq!(u.path(), "/tmp/foo.parquet");
}
#[test]
fn bare_relative_path_becomes_file_url() {
let u = StorageUrl::parse("data/triplets.parquet").unwrap();
assert_eq!(u.scheme(), Scheme::File);
assert!(u.path().ends_with("data/triplets.parquet"));
}
#[test]
fn unknown_scheme_rejected() {
let err = StorageUrl::parse("ftp://host/foo").unwrap_err();
assert!(matches!(err, StorageError::InvalidUrl { .. }));
}
#[test]
fn empty_input_rejected() {
let err = StorageUrl::parse("").unwrap_err();
assert!(matches!(err, StorageError::InvalidUrl { .. }));
}
}