Skip to main content

git_remote_object_store/object_store/azure/
auth.rs

1//! Credential resolution and the shared-key / SAS signing policies for
2//! the Azure Blob backend.
3//!
4//! The official `azure_storage_blob` 0.12 crate accepts only
5//! `Arc<dyn TokenCredential>` (Entra ID) on its constructors, but the
6//! Azurite emulator and many production accounts still authenticate
7//! with shared keys. We bridge the gap with a custom per-try
8//! [`Policy`] that signs each outgoing request using the Azure
9//! Storage shared-key v2 scheme. Tracking issue:
10//! `Azure/azure-sdk-for-rust#2975`.
11//!
12//! Resolution order:
13//!
14//! 1. URL flag `?credential=<NAME>` →
15//!    - `AZSTORE_<NAME>_KEY` (base64 account key) → [`SharedKeySigningPolicy`]
16//!    - `AZSTORE_<NAME>_CONNECTION_STRING` → parsed for `AccountKey=`
17//!      → [`SharedKeySigningPolicy`]
18//!    - `AZSTORE_<NAME>_SAS` → [`SasSigningPolicy`] (appends SAS query
19//!      params to every outgoing request URL)
20//! 2. No flag → [`azure_identity::DeveloperToolsCredential`].
21//!
22//! The shared-key signing implementation here is derived from the
23//! reference workaround posted on issue #2975, which itself was
24//! airlifted from the legacy `azure_storage` SDK. The
25//! string-to-sign / canonicalised-resource layout is documented at
26//! <https://learn.microsoft.com/en-us/rest/api/storageservices/authorize-with-shared-key>.
27
28use std::borrow::Cow;
29use std::collections::BTreeMap;
30use std::env;
31use std::sync::Arc;
32
33use async_trait::async_trait;
34use azure_core::credentials::TokenCredential;
35use azure_core::http::Method;
36use azure_core::http::headers::{HeaderName, Headers};
37use azure_core::http::policies::{Policy, PolicyResult};
38use azure_core::http::{Context, Request};
39use azure_identity::DeveloperToolsCredential;
40use base64::Engine;
41use base64::engine::general_purpose::STANDARD as BASE64;
42use hmac::{Hmac, Mac};
43use sha2::Sha256;
44use time::OffsetDateTime;
45use time::format_description::BorrowedFormatItem;
46use time::macros::format_description;
47use url::Url;
48
49use crate::object_store::ObjectStoreError;
50use crate::object_store::error::other_boxed;
51use crate::url::RemoteFlags;
52
53/// RFC 1123 date format Azure requires for `x-ms-date` — the
54/// `Sun, 06 Nov 1994 08:49:37 GMT` shape documented at
55/// <https://learn.microsoft.com/en-us/rest/api/storageservices/representation-of-date-time-values-in-headers>.
56///
57/// Pinning the format here keeps Azure-auth correctness independent of
58/// the exact byte emission of any well-known formatter in the `time`
59/// crate; previously we formatted via `Rfc2822` and string-replaced
60/// `+0000` → `GMT`, which would silently produce malformed headers if
61/// `time` ever changed its RFC 2822 layout (e.g., `+00:00`).
62const X_MS_DATE_FORMAT: &[BorrowedFormatItem<'_>] = format_description!(
63    "[weekday repr:short], [day padding:zero] [month repr:short] [year] \
64     [hour padding:zero]:[minute padding:zero]:[second padding:zero] GMT"
65);
66
67/// Outcome of [`resolve`]: at most one of (`token_credential`,
68/// `per_try_policy`) is populated. `sas_signing_key` is populated
69/// only for the shared-key / connection-string paths so that the
70/// `bundle-uri` capability (issue #76) can derive per-blob service
71/// SAS tokens; the SAS env-var path and the Entra-ID path leave it
72/// `None`, in which case `presigned_get_url` returns
73/// [`crate::object_store::ObjectStoreError::Unsupported`].
74pub(crate) struct ResolvedCredentials {
75    /// Entra ID credential, used when no `?credential=` alias is set.
76    pub token_credential: Option<Arc<dyn TokenCredential>>,
77    /// Per-try signing policy (shared-key or SAS), used when a
78    /// `?credential=` alias resolves to an env-var-provided key.
79    pub per_try_policy: Option<Arc<dyn Policy>>,
80    /// Account name + base64 storage key, when the credential alias
81    /// resolves to a shared key (KEY env var or connection string).
82    /// Held alongside the per-try policy so callers that need to
83    /// sign things outside the request pipeline (service-SAS for
84    /// `bundle-uri` presigned URLs) don't have to re-walk the env
85    /// vars. `None` for SAS / Entra-ID paths.
86    pub sas_signing_key: Option<SasSigningKey>,
87}
88
89/// Material required to sign a service-blob SAS token (issue #76).
90/// Carries the storage key as pre-decoded [`HmacKey`] bytes — the
91/// base64 decode happens once at credential-resolution time rather
92/// than per SAS-URL build.
93///
94/// `Debug` is derived: the inner [`HmacKey`] redacts its own bytes,
95/// so the default field-walking impl already produces safe output.
96#[derive(Clone, Debug)]
97pub(crate) struct SasSigningKey {
98    pub account: String,
99    pub key: HmacKey,
100}
101
102/// Pre-decoded HMAC-SHA256 key bytes for Azure shared-key /
103/// service-SAS signing. The base64 decode happens once at
104/// construction (in [`SharedKeySigningPolicy::new`] and the
105/// `parse_connection_string` paths) instead of per request.
106///
107/// The bytes themselves are an Azure storage account key — leaking
108/// them via `Debug` or log output would give full storage-account
109/// access — so the manual `Debug` impl redacts. The underlying
110/// `Vec<u8>` is not zeroized on drop; the existing `Secret`-based
111/// design did not zeroize either, so this is a parity choice
112/// rather than a regression (adding `zeroize` would mean a new
113/// dependency).
114#[derive(Clone)]
115pub struct HmacKey {
116    bytes: Vec<u8>,
117}
118
119impl HmacKey {
120    /// Decode `key_b64` and return an [`HmacKey`]. Surfaces the
121    /// decoding failure inline so the malformed-key error fires at
122    /// credential-resolution time rather than at first request.
123    ///
124    /// # Errors
125    ///
126    /// Returns [`ObjectStoreError::Other`] if `key_b64` is not valid
127    /// base64.
128    pub fn from_base64(key_b64: &str) -> Result<Self, ObjectStoreError> {
129        let bytes = BASE64.decode(key_b64.as_bytes()).map_err(|e| {
130            ObjectStoreError::Other(format!("AccountKey is not valid base64: {e}").into())
131        })?;
132        Ok(Self { bytes })
133    }
134
135    /// Raw HMAC key bytes. Use only inside the signing primitives.
136    fn as_bytes(&self) -> &[u8] {
137        &self.bytes
138    }
139}
140
141impl std::fmt::Debug for HmacKey {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        f.debug_struct("HmacKey")
144            .field("bytes", &"<redacted>")
145            .finish()
146    }
147}
148
149/// Resolve credentials for a parsed Azure URL.
150pub(crate) fn resolve(
151    account: &str,
152    flags: &RemoteFlags,
153) -> Result<ResolvedCredentials, ObjectStoreError> {
154    if let Some(alias) = flags.credential.as_deref() {
155        return resolve_alias(account, alias);
156    }
157    let cred = DeveloperToolsCredential::new(None).map_err(other_boxed)?;
158    Ok(ResolvedCredentials {
159        token_credential: Some(cred),
160        per_try_policy: None,
161        sas_signing_key: None,
162    })
163}
164
165fn resolve_alias(account: &str, alias: &str) -> Result<ResolvedCredentials, ObjectStoreError> {
166    if !is_valid_alias(alias) {
167        return Err(ObjectStoreError::Other(
168            format!(
169                "invalid credential alias `{alias}`: \
170                 must match [A-Za-z0-9_]+ (used to build env var names)"
171            )
172            .into(),
173        ));
174    }
175    let upper = alias.to_ascii_uppercase();
176    let key_var = format!("AZSTORE_{upper}_KEY");
177    let conn_var = format!("AZSTORE_{upper}_CONNECTION_STRING");
178    let sas_var = format!("AZSTORE_{upper}_SAS");
179
180    if let Some(key_b64) = lookup_env(&key_var)? {
181        let policy = SharedKeySigningPolicy::new(account, &key_b64)?;
182        let key = HmacKey::from_base64(&key_b64)?;
183        return Ok(resolved(
184            Arc::new(policy),
185            Some(SasSigningKey {
186                account: account.to_owned(),
187                key,
188            }),
189        ));
190    }
191    if let Some(conn) = lookup_env(&conn_var)? {
192        let parsed = parse_connection_string(&conn)?;
193        let policy = SharedKeySigningPolicy::new(&parsed.account, &parsed.key_b64)?;
194        let key = HmacKey::from_base64(&parsed.key_b64)?;
195        return Ok(resolved(
196            Arc::new(policy),
197            Some(SasSigningKey {
198                account: parsed.account,
199                key,
200            }),
201        ));
202    }
203    if let Some(sas) = lookup_env(&sas_var)? {
204        let policy = SasSigningPolicy::new(&sas)?;
205        // SAS-env-var path has no storage key, so we cannot derive
206        // a fresh per-blob SAS for `bundle-uri` presigning. Pass
207        // `None` to make the missing key explicit at the call site.
208        return Ok(resolved(Arc::new(policy), None));
209    }
210
211    Err(ObjectStoreError::Other(
212        format!(
213            "credential alias `{alias}` has no env var set: \
214             expected {key_var}, {conn_var}, or {sas_var}"
215        )
216        .into(),
217    ))
218}
219
220/// Read a credential-chain env var, distinguishing "not set" from
221/// "set to non-UTF-8 bytes". Returning `Ok(None)` for `NotPresent`
222/// keeps the chain walking; returning `Err(...)` for `NotUnicode`
223/// surfaces the misconfiguration with the offending variable name
224/// so the operator does not chase a "no env var set" message when
225/// the var actually was set.
226fn lookup_env(name: &str) -> Result<Option<String>, ObjectStoreError> {
227    match env::var(name) {
228        Ok(v) => Ok(Some(v)),
229        Err(env::VarError::NotPresent) => Ok(None),
230        Err(env::VarError::NotUnicode(_)) => Err(ObjectStoreError::Other(
231            format!("env var `{name}` is set but its value is not valid UTF-8").into(),
232        )),
233    }
234}
235
236/// Build a [`ResolvedCredentials`] from a per-try signing policy
237/// plus an optional SAS-signing key. The two `Option<SasSigningKey>`
238/// states make the single distinction between alias paths explicit
239/// at the call site: `Some(...)` for shared-key / connection-string
240/// (presigning is reachable), `None` for SAS-env-var (presigning
241/// returns `Unsupported`).
242fn resolved(
243    policy: Arc<dyn Policy>,
244    sas_signing_key: Option<SasSigningKey>,
245) -> ResolvedCredentials {
246    ResolvedCredentials {
247        token_credential: None,
248        per_try_policy: Some(policy),
249        sas_signing_key,
250    }
251}
252
253fn is_valid_alias(s: &str) -> bool {
254    !s.is_empty() && s.len() <= 64 && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_')
255}
256
257/// Parsed `AccountName` / `AccountKey` from an Azure connection string.
258#[derive(Debug)]
259pub(crate) struct ConnectionStringParts {
260    pub account: String,
261    pub key_b64: String,
262}
263
264/// Parse the Azure connection-string format documented at
265/// <https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string>.
266///
267/// Only `AccountName` and `AccountKey` are required; other fields
268/// (`DefaultEndpointsProtocol`, `BlobEndpoint`, ...) are accepted but
269/// ignored. The endpoint URL is taken from the parsed `RemoteUrl`,
270/// not from the connection string, so the URL is the single source
271/// of truth for endpoint/host/port.
272pub(crate) fn parse_connection_string(
273    input: &str,
274) -> Result<ConnectionStringParts, ObjectStoreError> {
275    let mut account = None;
276    let mut key_b64 = None;
277    for segment in input.split(';') {
278        let segment = segment.trim();
279        if segment.is_empty() {
280            continue;
281        }
282        // Surface malformed segments instead of silently skipping —
283        // a typo like `AccountKeyy=...` would otherwise be ignored
284        // and reported as "missing AccountKey", which sends the user
285        // chasing the wrong field.
286        let Some((k, v)) = segment.split_once('=') else {
287            return Err(ObjectStoreError::Other(
288                format!("connection string segment `{segment}` is missing `=`").into(),
289            ));
290        };
291        match k {
292            "AccountName" => account = Some(v.to_owned()),
293            "AccountKey" => key_b64 = Some(v.to_owned()),
294            // Tolerate every other documented field (BlobEndpoint,
295            // DefaultEndpointsProtocol, EndpointSuffix, ...) without
296            // demanding we know each one — the URL itself is the
297            // authoritative endpoint source.
298            _ => {}
299        }
300    }
301    let account = account
302        .ok_or_else(|| ObjectStoreError::Other("connection string missing AccountName".into()))?;
303    let key_b64 = key_b64
304        .ok_or_else(|| ObjectStoreError::Other("connection string missing AccountKey".into()))?;
305    Ok(ConnectionStringParts { account, key_b64 })
306}
307
308// ---------------------------------------------------------------------------
309// SharedKeySigningPolicy
310// ---------------------------------------------------------------------------
311
312/// Per-try policy that signs every outgoing request with the Azure
313/// Storage shared-key v2 scheme.
314#[derive(Debug)]
315pub(crate) struct SharedKeySigningPolicy {
316    account: String,
317    key: HmacKey,
318}
319
320impl SharedKeySigningPolicy {
321    pub(crate) fn new(account: &str, key_b64: &str) -> Result<Self, ObjectStoreError> {
322        // Validate base64-decodability up front so a malformed key
323        // surfaces at construction, not on the first request. The
324        // decoded bytes are cached so subsequent signs avoid the
325        // per-request decode cost.
326        let key = HmacKey::from_base64(key_b64)?;
327        Ok(Self {
328            account: account.to_owned(),
329            key,
330        })
331    }
332}
333
334#[async_trait]
335impl Policy for SharedKeySigningPolicy {
336    async fn send(
337        &self,
338        ctx: &Context,
339        request: &mut Request,
340        next: &[Arc<dyn Policy>],
341    ) -> PolicyResult {
342        // Stamp x-ms-date so signing has a stable canonicalised header
343        // value. The SDK's date policy sometimes injects a regular
344        // `Date` header instead; `x-ms-date` takes precedence per
345        // the Azure spec.
346        let now = OffsetDateTime::now_utc();
347        let date = now.format(&X_MS_DATE_FORMAT).map_err(|e| {
348            azure_core::Error::with_message(
349                azure_core::error::ErrorKind::Other,
350                format!("failed to format x-ms-date: {e}"),
351            )
352        })?;
353        request.insert_header(HeaderName::from_static("x-ms-date"), date);
354
355        let method = request.method();
356        let url = request.url().clone();
357        let content_length = request_content_length(request);
358        let auth = compute_authorization(
359            &self.account,
360            &self.key,
361            method,
362            &url,
363            request.headers(),
364            content_length,
365        )
366        .map_err(|e| {
367            azure_core::Error::with_message(
368                azure_core::error::ErrorKind::Other,
369                format!("shared-key signing failed: {e}"),
370            )
371        })?;
372        request.insert_header(HeaderName::from_static("authorization"), auth);
373
374        forward_to_next(ctx, request, next, "shared-key").await
375    }
376}
377
378/// Hand the request to the next policy in the chain, returning a clear
379/// error if the chain was empty (the SDK always installs at least the
380/// transport policy as the tail, so an empty chain only fires when the
381/// signing policy is wired wrong).
382async fn forward_to_next(
383    ctx: &Context<'_>,
384    request: &mut Request,
385    next: &[Arc<dyn Policy>],
386    policy_name: &'static str,
387) -> PolicyResult {
388    match next.first() {
389        Some(p) => p.send(ctx, request, &next[1..]).await,
390        None => Err(azure_core::Error::with_message(
391            azure_core::error::ErrorKind::Other,
392            format!("{policy_name} policy installed without a downstream policy"),
393        )),
394    }
395}
396
397/// Pull `Content-Length` from the request, falling back to the body
398/// length if the header is not yet stamped. Returns `None` for empty
399/// bodies (the spec says omit the value from the string-to-sign).
400fn request_content_length(request: &Request) -> Option<u64> {
401    if let Some(s) = request
402        .headers()
403        .get_optional_str(&HeaderName::from_static("content-length"))
404        && let Ok(n) = s.parse::<u64>()
405    {
406        return if n == 0 { None } else { Some(n) };
407    }
408    match request.body().len() {
409        Some(0) | None => None,
410        Some(n) => Some(n),
411    }
412}
413
414/// Compute the `Authorization: SharedKey <account>:<sig>` header value.
415///
416/// Exposed as `pub` so the Azurite integration test (in a separate
417/// crate) can sign its own container-create setup request. There is
418/// no production caller outside this crate; the function is small,
419/// pure, and stable enough that re-using it in tests is preferable
420/// to duplicating the spec-exact canonicalisation logic.
421///
422/// The `key` argument is a pre-decoded [`HmacKey`]; callers that
423/// have only a base64 string call [`HmacKey::from_base64`] once and
424/// reuse the resulting [`HmacKey`] across signs.
425///
426/// # Errors
427///
428/// Returns `Err(String)` if HMAC initialisation fails.
429pub fn compute_authorization(
430    account: &str,
431    key: &HmacKey,
432    method: Method,
433    url: &Url,
434    headers: &Headers,
435    content_length: Option<u64>,
436) -> Result<String, String> {
437    let canon_resource = canonicalized_resource(account, url);
438    let canon_headers = canonicalized_headers(headers);
439    let string_to_sign = string_to_sign(
440        method,
441        headers,
442        content_length,
443        &canon_headers,
444        &canon_resource,
445    );
446    let sig = hmac_sha256_base64(&string_to_sign, key)?;
447    Ok(format!("SharedKey {account}:{sig}"))
448}
449
450/// Look up a header value for inclusion in the string-to-sign,
451/// applying the same trim + unfold-newlines transform that
452/// [`canonicalized_headers`] applies to `x-ms-*` headers. Both
453/// sites participate in the same string-to-sign and must use the
454/// same sanitisation — a literal `\n` in any of these header
455/// values would shift fields downstream and let a malformed
456/// request masquerade as a different signed request. In practice
457/// the HTTP stack rejects header values containing newlines, so
458/// this is theoretical; the consistency is the point.
459fn header_str<'a>(headers: &'a Headers, name: &'static str) -> Cow<'a, str> {
460    let raw = headers
461        .get_optional_str(&HeaderName::from_static(name))
462        .unwrap_or("");
463    let trimmed = raw.trim();
464    if trimmed.contains('\n') {
465        Cow::Owned(trimmed.replace('\n', " "))
466    } else {
467        Cow::Borrowed(trimmed)
468    }
469}
470
471/// Build the Azure shared-key v2 string-to-sign.
472fn string_to_sign(
473    method: Method,
474    headers: &Headers,
475    content_length: Option<u64>,
476    canon_headers: &str,
477    canon_resource: &str,
478) -> String {
479    let cl = content_length.map(|n| n.to_string()).unwrap_or_default();
480    format!(
481        "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}{}",
482        method.as_ref(),
483        header_str(headers, "content-encoding"),
484        header_str(headers, "content-language"),
485        cl,
486        header_str(headers, "content-md5"),
487        header_str(headers, "content-type"),
488        // `Date` is omitted — `x-ms-date` (in canon_headers) takes
489        // precedence per the Azure spec.
490        "",
491        header_str(headers, "if-modified-since"),
492        header_str(headers, "if-match"),
493        header_str(headers, "if-none-match"),
494        header_str(headers, "if-unmodified-since"),
495        header_str(headers, "range"),
496        canon_headers,
497        canon_resource,
498    )
499}
500
501/// Build the `CanonicalizedHeaders` string per the Azure spec.
502fn canonicalized_headers(headers: &Headers) -> String {
503    let mut sorted: BTreeMap<String, String> = BTreeMap::new();
504    for (name, value) in headers.iter() {
505        let name = name.as_str().to_ascii_lowercase();
506        if !name.starts_with("x-ms-") {
507            continue;
508        }
509        // The spec requires unfolding embedded newlines into single
510        // spaces, but the `\n` case is rare — avoid the unconditional
511        // allocation that `str::replace` performs.
512        let trimmed = value.as_str().trim();
513        let value: Cow<'_, str> = if trimmed.contains('\n') {
514            Cow::Owned(trimmed.replace('\n', " "))
515        } else {
516            Cow::Borrowed(trimmed)
517        };
518        sorted
519            .entry(name)
520            .and_modify(|existing| {
521                existing.push(',');
522                existing.push_str(&value);
523            })
524            .or_insert_with(|| value.into_owned());
525    }
526    let mut out = String::new();
527    for (name, value) in sorted {
528        out.push_str(&name);
529        out.push(':');
530        out.push_str(&value);
531        out.push('\n');
532    }
533    out
534}
535
536/// Build the `CanonicalizedResource` string per the Azure spec.
537fn canonicalized_resource(account: &str, url: &Url) -> String {
538    let mut out = format!("/{account}");
539    let path = url.path();
540    if !path.starts_with('/') {
541        out.push('/');
542    }
543    out.push_str(path);
544
545    let mut grouped: BTreeMap<String, Vec<String>> = BTreeMap::new();
546    for (k, v) in url.query_pairs() {
547        let key = k.to_ascii_lowercase();
548        grouped.entry(key).or_default().push(v.into_owned());
549    }
550    for (name, mut values) in grouped {
551        values.sort_unstable();
552        out.push('\n');
553        out.push_str(&name);
554        out.push(':');
555        for (i, v) in values.iter().enumerate() {
556            if i > 0 {
557                out.push(',');
558            }
559            out.push_str(v);
560        }
561    }
562    out
563}
564
565/// HMAC-SHA256 over `data` with a pre-decoded [`HmacKey`], returning
566/// a base64-encoded MAC. Used by both the shared-key signing policy
567/// (`Authorization: SharedKey …` header) and the service-blob SAS
568/// builder ([`super::sas`]) — same primitive, same byte sequence,
569/// same error wording. The base64 decode of the storage key happens
570/// once at [`HmacKey::from_base64`] time; this function does no
571/// decoding.
572pub(super) fn hmac_sha256_base64(data: &str, key: &HmacKey) -> Result<String, String> {
573    let mut mac = <Hmac<Sha256> as Mac>::new_from_slice(key.as_bytes())
574        .map_err(|e| format!("HMAC init: {e}"))?;
575    mac.update(data.as_bytes());
576    Ok(BASE64.encode(mac.finalize().into_bytes()))
577}
578
579// ---------------------------------------------------------------------------
580// SasSigningPolicy
581// ---------------------------------------------------------------------------
582
583/// Per-try policy that appends SAS query parameters to every
584/// outgoing request URL.
585#[derive(Debug)]
586pub(crate) struct SasSigningPolicy {
587    pairs: Vec<(String, String)>,
588}
589
590impl SasSigningPolicy {
591    pub(crate) fn new(sas: &str) -> Result<Self, ObjectStoreError> {
592        let trimmed = sas.trim().trim_start_matches('?');
593        if trimmed.is_empty() {
594            return Err(ObjectStoreError::Other("SAS token is empty".into()));
595        }
596        let parsed = Url::parse(&format!("https://example.invalid/?{trimmed}"))
597            .map_err(|e| ObjectStoreError::Other(format!("malformed SAS token: {e}").into()))?;
598        let pairs: Vec<(String, String)> = parsed
599            .query_pairs()
600            .map(|(k, v)| (k.into_owned(), v.into_owned()))
601            .collect();
602        if pairs.is_empty() {
603            return Err(ObjectStoreError::Other(
604                "SAS token has no query parameters".into(),
605            ));
606        }
607        Ok(Self { pairs })
608    }
609}
610
611#[async_trait]
612impl Policy for SasSigningPolicy {
613    async fn send(
614        &self,
615        ctx: &Context,
616        request: &mut Request,
617        next: &[Arc<dyn Policy>],
618    ) -> PolicyResult {
619        let url = request.url_mut();
620        let sas_keys: std::collections::HashSet<&str> =
621            self.pairs.iter().map(|(k, _)| k.as_str()).collect();
622        let preserved: Vec<(String, String)> = url
623            .query_pairs()
624            .filter_map(|(k, v)| {
625                if sas_keys.contains(k.as_ref()) {
626                    None
627                } else {
628                    Some((k.into_owned(), v.into_owned()))
629                }
630            })
631            .collect();
632        url.set_query(None);
633        {
634            let mut q = url.query_pairs_mut();
635            for (k, v) in &preserved {
636                q.append_pair(k, v);
637            }
638            for (k, v) in &self.pairs {
639                q.append_pair(k, v);
640            }
641        }
642
643        forward_to_next(ctx, request, next, "SAS").await
644    }
645}
646
647#[cfg(test)]
648mod tests {
649    use super::*;
650
651    // --- is_valid_alias / parse_connection_string ---------------------
652
653    #[test]
654    fn alias_charset() {
655        assert!(is_valid_alias("PROD"));
656        assert!(is_valid_alias("dev_1"));
657        assert!(!is_valid_alias(""));
658        assert!(!is_valid_alias("has-dash"));
659        assert!(!is_valid_alias("has space"));
660        assert!(!is_valid_alias(&"a".repeat(65)));
661    }
662
663    #[test]
664    fn parse_connection_string_extracts_account_and_key() {
665        let s = "DefaultEndpointsProtocol=http;\
666                 AccountName=devstoreaccount1;\
667                 AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;\
668                 BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;";
669        let parts = parse_connection_string(s).expect("parses");
670        assert_eq!(parts.account, "devstoreaccount1");
671        assert!(parts.key_b64.starts_with("Eby8vdM"));
672    }
673
674    #[test]
675    fn parse_connection_string_requires_account_name() {
676        let s = "AccountKey=abc==;BlobEndpoint=http://x/";
677        let err = parse_connection_string(s).unwrap_err();
678        assert!(err.to_string().contains("AccountName"), "{err}");
679    }
680
681    #[test]
682    fn parse_connection_string_requires_account_key() {
683        let s = "AccountName=acct;BlobEndpoint=http://x/";
684        let err = parse_connection_string(s).unwrap_err();
685        assert!(err.to_string().contains("AccountKey"), "{err}");
686    }
687
688    #[test]
689    fn parse_connection_string_ignores_blank_segments() {
690        let s = ";;AccountName=acct;;AccountKey=YWJj;;";
691        let parts = parse_connection_string(s).expect("parses");
692        assert_eq!(parts.account, "acct");
693        assert_eq!(parts.key_b64, "YWJj");
694    }
695
696    #[test]
697    fn parse_connection_string_rejects_segment_without_equals() {
698        let s = "AccountName=acct;malformed;AccountKey=YWJj";
699        let err = parse_connection_string(s).unwrap_err();
700        assert!(
701            err.to_string().contains("malformed"),
702            "error names the bad segment: {err}"
703        );
704    }
705
706    // --- canonicalized_resource ---------------------------------------
707
708    #[test]
709    fn canon_resource_path_only() {
710        let url = Url::parse("https://acct.blob.core.windows.net/container/blob").unwrap();
711        let out = canonicalized_resource("acct", &url);
712        assert_eq!(out, "/acct/container/blob");
713    }
714
715    #[test]
716    fn canon_resource_with_query_params_sorts_and_lowercases() {
717        let url = Url::parse(
718            "https://acct.blob.core.windows.net/c/b?Restype=container&comp=list&PREFIX=p",
719        )
720        .unwrap();
721        let out = canonicalized_resource("acct", &url);
722        assert_eq!(out, "/acct/c/b\ncomp:list\nprefix:p\nrestype:container");
723    }
724
725    #[test]
726    fn canon_resource_groups_duplicate_keys() {
727        let url = Url::parse("https://x.blob.core.windows.net/c?inc=a&inc=b").unwrap();
728        let out = canonicalized_resource("x", &url);
729        assert_eq!(out, "/x/c\ninc:a,b");
730    }
731
732    // --- canonicalized_headers ----------------------------------------
733
734    #[test]
735    fn canon_headers_filters_x_ms_only_and_sorts() {
736        let mut headers = Headers::new();
737        headers.insert(HeaderName::from_static("x-ms-version"), "2025-11-05");
738        headers.insert(
739            HeaderName::from_static("x-ms-date"),
740            "Wed, 01 Jan 2025 00:00:00 GMT",
741        );
742        headers.insert(HeaderName::from_static("authorization"), "ignored");
743        headers.insert(
744            HeaderName::from_static("content-type"),
745            "application/octet-stream",
746        );
747        let out = canonicalized_headers(&headers);
748        assert_eq!(
749            out,
750            "x-ms-date:Wed, 01 Jan 2025 00:00:00 GMT\nx-ms-version:2025-11-05\n"
751        );
752    }
753
754    #[test]
755    fn canon_headers_handles_no_x_ms_headers() {
756        let mut headers = Headers::new();
757        headers.insert(HeaderName::from_static("content-type"), "x");
758        assert_eq!(canonicalized_headers(&headers), "");
759    }
760
761    // --- compute_authorization fixed vector ---------------------------
762
763    #[test]
764    fn compute_authorization_matches_known_vector() {
765        // Hand-built fixture: GET against a container with the
766        // well-known Azurite key. Exact wire-format isn't easily
767        // verifiable in a unit test against the real service, but we
768        // can ensure the signing function produces a stable,
769        // deterministic value given fixed inputs — locking the
770        // canonicalisation into place so future refactors don't
771        // silently change wire output.
772        let key_b64 = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
773        let key = HmacKey::from_base64(key_b64).expect("valid base64");
774        let url =
775            Url::parse("http://127.0.0.1:10000/devstoreaccount1/c?restype=container&comp=list")
776                .unwrap();
777        let mut headers = Headers::new();
778        headers.insert(
779            HeaderName::from_static("x-ms-date"),
780            "Wed, 01 Jan 2025 00:00:00 GMT",
781        );
782        headers.insert(HeaderName::from_static("x-ms-version"), "2025-11-05");
783
784        let auth =
785            compute_authorization("devstoreaccount1", &key, Method::Get, &url, &headers, None)
786                .expect("signs");
787        assert!(auth.starts_with("SharedKey devstoreaccount1:"));
788        let sig = auth.strip_prefix("SharedKey devstoreaccount1:").unwrap();
789        // HMAC-SHA256 → 32 bytes → 44 chars base64.
790        assert_eq!(sig.len(), 44, "unexpected sig length: `{sig}`");
791    }
792
793    // --- X_MS_DATE_FORMAT --------------------------------------------
794
795    /// F-003: signing through a pre-decoded [`HmacKey`] must produce
796    /// the canonical Azure shared-key v2 signature for the fixed
797    /// inputs below. The pinned wire-format byte sequence is the
798    /// real assertion — a future refactor that silently swaps the
799    /// decoder, transforms the key bytes, or alters the
800    /// string-to-sign layout will flip the signature and trip the
801    /// equality check.
802    #[test]
803    fn hmac_key_signs_canonical_shared_key_v2_signature() {
804        let key_b64 = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
805        let url =
806            Url::parse("http://127.0.0.1:10000/devstoreaccount1/c?restype=container&comp=list")
807                .unwrap();
808        let mut headers = Headers::new();
809        headers.insert(
810            HeaderName::from_static("x-ms-date"),
811            "Wed, 01 Jan 2025 00:00:00 GMT",
812        );
813        headers.insert(HeaderName::from_static("x-ms-version"), "2025-11-05");
814
815        let key = HmacKey::from_base64(key_b64).expect("valid base64");
816        let auth =
817            compute_authorization("devstoreaccount1", &key, Method::Get, &url, &headers, None)
818                .expect("signs");
819        // Pinned wire-format vector — computed once from the inputs
820        // above against the Azure shared-key v2 string-to-sign
821        // layout. (Round-tripping two independently-constructed
822        // `HmacKey`s would only verify that base64 decode is
823        // deterministic — already guaranteed by the standard.)
824        assert_eq!(
825            auth, "SharedKey devstoreaccount1:VgcoAvg+vqaLJ76WpTkj7NrIj4dwCiYGPiMhJ7Q/2zI=",
826            "signature must match the pinned wire-format vector",
827        );
828    }
829
830    /// F-003: the derived `Debug` on [`SasSigningKey`] must inherit
831    /// [`HmacKey`]'s redaction — switching from a manual impl to
832    /// `derive(Debug)` relies on the inner field's `Debug` doing the
833    /// right thing, so pin it here.
834    #[test]
835    fn sas_signing_key_debug_does_not_leak_inner_bytes() {
836        let key_b64 = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
837        let signing = SasSigningKey {
838            account: "devstoreaccount1".to_owned(),
839            key: HmacKey::from_base64(key_b64).expect("valid base64"),
840        };
841        let rendered = format!("{signing:?}");
842        assert!(
843            rendered.contains("redacted"),
844            "Debug must redact via inner HmacKey: {rendered}"
845        );
846        assert!(
847            !rendered.contains("bytes: ["),
848            "Debug must not leak raw key bytes: {rendered}"
849        );
850    }
851
852    /// F-003: `Debug` on [`HmacKey`] must not leak the underlying
853    /// key bytes — any rendering that includes the raw `Vec<u8>`
854    /// would surface the key in tracing output.
855    #[test]
856    fn hmac_key_debug_does_not_leak_bytes() {
857        let key_b64 = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
858        let key = HmacKey::from_base64(key_b64).expect("valid base64");
859        let rendered = format!("{key:?}");
860        assert!(
861            rendered.contains("redacted"),
862            "Debug must redact: {rendered}"
863        );
864        // A leaky derive would render the raw `Vec<u8>` as
865        // `[0x12, 0x34, ...]`. Reject any byte-array delimiters
866        // appearing alongside the bytes field name.
867        assert!(
868            !rendered.contains("bytes: ["),
869            "Debug output must not include raw key bytes: {rendered}"
870        );
871    }
872
873    #[test]
874    fn x_ms_date_format_matches_rfc1123_literal() {
875        // Canonical RFC 1123 / Azure example, also used as the IMF-fixdate
876        // sample in RFC 7231: `Sun, 06 Nov 1994 08:49:37 GMT`.
877        // Unix timestamp 784111777 is 1994-11-06T08:49:37Z.
878        let when = OffsetDateTime::from_unix_timestamp(784_111_777).expect("valid timestamp");
879        let formatted = when.format(&X_MS_DATE_FORMAT).expect("formats");
880        assert_eq!(formatted, "Sun, 06 Nov 1994 08:49:37 GMT");
881    }
882
883    #[test]
884    fn x_ms_date_format_zero_pads_single_digit_fields() {
885        // Guards against a future regression that drops zero-padding on
886        // day, hour, minute, or second — Azure rejects unpadded values.
887        // 2025-01-02T03:04:05Z → all single-digit components.
888        let when = OffsetDateTime::from_unix_timestamp(1_735_787_045).expect("valid timestamp");
889        let formatted = when.format(&X_MS_DATE_FORMAT).expect("formats");
890        assert_eq!(formatted, "Thu, 02 Jan 2025 03:04:05 GMT");
891    }
892
893    // --- SasSigningPolicy --------------------------------------------
894
895    #[test]
896    fn sas_policy_rejects_empty() {
897        assert!(SasSigningPolicy::new("").is_err());
898        assert!(SasSigningPolicy::new("?").is_err());
899        assert!(SasSigningPolicy::new("   ").is_err());
900    }
901
902    // --- lookup_env ---------------------------------------------------
903
904    #[test]
905    fn lookup_env_returns_none_when_unset() {
906        // Use a var name that no other test touches so parallel test
907        // runs do not race. `lookup_env` does no caching, so reading
908        // a guaranteed-unset name is deterministic.
909        let name = "AZSTORE_AUTH_TEST_DEFINITELY_UNSET_VAR";
910        let _env = crate::test_util::EnvGuard::unset(name);
911        assert!(matches!(lookup_env(name), Ok(None)));
912    }
913
914    #[test]
915    fn lookup_env_returns_value_when_valid_utf8() {
916        let name = "AZSTORE_AUTH_TEST_VALID_UTF8";
917        let _env = crate::test_util::EnvGuard::set(name, "hello");
918        let value = lookup_env(name).expect("UTF-8 value must read");
919        assert_eq!(value.as_deref(), Some("hello"));
920    }
921
922    /// Issue #218: a credential env var set to non-UTF-8 bytes must
923    /// surface as a structured error naming the offending variable,
924    /// not be silently treated as "unset" (which previously sent the
925    /// operator chasing the "no env var set" message).
926    #[cfg(unix)]
927    #[test]
928    fn lookup_env_surfaces_not_unicode_error_naming_var() {
929        use std::ffi::OsString;
930        use std::os::unix::ffi::OsStringExt;
931
932        let name = "AZSTORE_AUTH_TEST_NOT_UNICODE";
933        // 0xFF is never valid in a UTF-8 byte stream, so this is
934        // guaranteed to land in the `VarError::NotUnicode` branch.
935        let bad = OsString::from_vec(vec![0xFF, 0xFE, 0xFD]);
936        let _env = crate::test_util::EnvGuard::set(name, &bad);
937        let err = lookup_env(name).expect_err("non-UTF-8 env value must error, not be ignored");
938        let msg = err.to_string();
939        assert!(
940            msg.contains(name),
941            "error must name the offending var (`{name}`): {msg}"
942        );
943        assert!(
944            msg.contains("not valid UTF-8") || msg.contains("UTF-8"),
945            "error must mention UTF-8: {msg}"
946        );
947    }
948
949    #[test]
950    fn sas_policy_parses_with_or_without_leading_question() {
951        let a = SasSigningPolicy::new("sv=2025&sig=abc").expect("parses");
952        let b = SasSigningPolicy::new("?sv=2025&sig=abc").expect("parses");
953        assert_eq!(a.pairs, b.pairs);
954        assert!(a.pairs.iter().any(|(k, v)| k == "sv" && v == "2025"));
955        assert!(a.pairs.iter().any(|(k, v)| k == "sig" && v == "abc"));
956    }
957}