Skip to main content

imferno_core/storage/
mod.rs

1//! Storage abstraction for IMF package I/O.
2//!
3//! Provides the [`Storage`] trait and built-in implementations for
4//! local filesystem ([`fs::FsStorage`]) and S3 ([`s3::S3Storage`],
5//! behind the `aws-s3` feature flag).
6
7pub mod fs;
8
9#[cfg(feature = "aws-s3")]
10pub mod s3;
11
12/// Parsed storage URI.
13///
14/// Recognized schemes: `file`, `s3`. Bare paths (no scheme) are normalized
15/// to `file://` URIs.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct StorageUri {
18    pub scheme: Scheme,
19    /// For `file`: an absolute or relative filesystem path.
20    /// For `s3`: the key prefix (no leading `/`).
21    pub path: String,
22    /// For `s3` only: the bucket name.
23    pub bucket: Option<String>,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum Scheme {
28    File,
29    S3,
30}
31
32impl StorageUri {
33    /// Parse a URI string. Bare paths become `file://` URIs.
34    pub fn parse(input: &str) -> Result<Self, StorageError> {
35        // Bare paths (no scheme://) — treat as filesystem
36        if !input.contains("://") {
37            return Ok(StorageUri {
38                scheme: Scheme::File,
39                path: input.to_string(),
40                bucket: None,
41            });
42        }
43
44        let parsed = url::Url::parse(input)
45            .map_err(|e| StorageError::InvalidUri(format!("{input}: {e}")))?;
46
47        match parsed.scheme() {
48            "file" => Ok(StorageUri {
49                scheme: Scheme::File,
50                path: parsed.path().to_string(),
51                bucket: None,
52            }),
53            "s3" => {
54                let bucket = parsed
55                    .host_str()
56                    .ok_or_else(|| {
57                        StorageError::InvalidUri(format!("s3 URI missing bucket: {input}"))
58                    })?
59                    .to_string();
60                // url crate prefixes path with /, drop it for S3 key prefix semantics
61                let path = parsed.path().trim_start_matches('/').to_string();
62                Ok(StorageUri {
63                    scheme: Scheme::S3,
64                    path,
65                    bucket: Some(bucket),
66                })
67            }
68            other => Err(StorageError::UnsupportedScheme(other.to_string())),
69        }
70    }
71}
72
73/// Synchronous abstraction over a storage backend.
74///
75/// Implementations expose the operations needed by the IMF package reader:
76/// listing entries under a URI and reading their full contents as UTF-8 strings.
77/// Range reads are intentionally absent from v1.0 — the validator currently
78/// loads full XML files and reads MXF header partitions through the local FS.
79/// Range support will be added when MXF reading is migrated to the trait.
80///
81/// All methods are sync. Async backends (e.g. S3) wrap a tokio runtime
82/// internally so callers do not need an async context.
83pub trait Storage: Send + Sync {
84    /// List entries at the given URI.
85    ///
86    /// For `file://`, this is non-recursive `read_dir`. For `s3://`, this is
87    /// `ListObjectsV2` with the URI's path as `prefix`.
88    fn list(&self, uri: &StorageUri) -> Result<Vec<Entry>, StorageError>;
89
90    /// Read a file's full contents as a UTF-8 string.
91    ///
92    /// Files that fail UTF-8 decoding return an error; callers (e.g. the
93    /// XML-only filter in `read_xml_files`) should skip them gracefully.
94    fn read_to_string(&self, uri: &StorageUri) -> Result<String, StorageError>;
95}
96
97/// A single entry returned by [`Storage::list`].
98///
99/// `uri` is whatever the backend uses as its identifier — for `FsStorage`
100/// it is a bare absolute path string (no `file://` prefix), preserving
101/// backward compatibility with `package::read_dir`. For `S3Storage` it is
102/// an `s3://bucket/key` URI.
103#[derive(Debug, Clone)]
104pub struct Entry {
105    pub uri: String,
106    pub size: u64,
107    pub is_file: bool,
108}
109
110#[derive(thiserror::Error, Debug)]
111pub enum StorageError {
112    #[error("invalid URI: {0}")]
113    InvalidUri(String),
114    #[error("unsupported scheme: {0}")]
115    UnsupportedScheme(String),
116    #[error("IO: {0}")]
117    Io(#[from] std::io::Error),
118    #[error("backend error: {0}")]
119    Backend(String),
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn parse_file_uri() {
128        let uri = StorageUri::parse("file:///tmp/imp").unwrap();
129        assert_eq!(uri.scheme, Scheme::File);
130        assert_eq!(uri.path, "/tmp/imp");
131        assert_eq!(uri.bucket, None);
132    }
133
134    #[test]
135    fn parse_s3_uri() {
136        let uri = StorageUri::parse("s3://my-bucket/path/to/imp/").unwrap();
137        assert_eq!(uri.scheme, Scheme::S3);
138        assert_eq!(uri.bucket, Some("my-bucket".to_string()));
139        assert_eq!(uri.path, "path/to/imp/");
140    }
141
142    #[test]
143    fn parse_bare_path_becomes_file_uri() {
144        let uri = StorageUri::parse("/tmp/imp").unwrap();
145        assert_eq!(uri.scheme, Scheme::File);
146        assert_eq!(uri.path, "/tmp/imp");
147    }
148
149    #[test]
150    fn parse_unsupported_scheme_errors() {
151        let err = StorageUri::parse("ftp://example.com/imp").unwrap_err();
152        assert!(matches!(err, StorageError::UnsupportedScheme(_)));
153    }
154}