Skip to main content

eure/query/
asset_locator.rs

1//! Asset locator with URL host validation.
2//!
3//! Provides URL host validation against an allowlist:
4//! - `eure.dev` is always allowed (default trusted host)
5//! - Additional hosts can be configured via `@ security.allowed-hosts` in Eure.eure
6
7use query_flow::{Db, LocateResult, QueryError, asset_locator};
8use url::Url;
9
10use crate::query::config::WorkspaceConfig;
11
12use super::assets::{TextFile, TextFileContent, WorkspaceId};
13use super::error::EureQueryError;
14
15/// The default allowed host (always trusted).
16const DEFAULT_ALLOWED_HOST: &str = "eure.dev";
17
18/// Asset locator that validates URL hosts before allowing fetches.
19///
20/// This locator:
21/// 1. Validates remote URLs against the host allowlist
22/// 2. Returns `Pending` for allowed URLs (to be fetched by platform)
23/// 3. Returns `UserError` for disallowed hosts
24///
25/// Local files are always allowed (no validation needed).
26///
27/// # Allowlist Resolution
28/// 1. `eure.dev` and `*.eure.dev` are always allowed
29/// 2. Additional hosts come from `@ security.allowed-hosts` in workspace config
30/// 3. If no config is available, only `eure.dev` is allowed
31#[asset_locator]
32pub fn text_file_locator(
33    db: &impl Db,
34    key: &TextFile,
35) -> Result<LocateResult<TextFileContent>, QueryError> {
36    match key {
37        TextFile::Local(_) => {
38            // Local files are always allowed
39            Ok(LocateResult::Pending)
40        }
41        TextFile::Remote(url) => {
42            // Validate remote URL host
43            validate_url_host(db, url)?;
44            // Host is allowed - let platform fetch
45            Ok(LocateResult::Pending)
46        }
47    }
48}
49
50/// Validate that a URL's host is in the allowlist.
51///
52/// Returns `Ok(())` if the host is allowed, or `Err(QueryError::UserError)` if not.
53fn validate_url_host(db: &impl Db, url: &Url) -> Result<(), QueryError> {
54    let host = url.host_str().unwrap_or("");
55
56    // eure.dev is always allowed (including subdomains)
57    if host == DEFAULT_ALLOWED_HOST {
58        return Ok(());
59    }
60
61    // Try to get config from workspace
62    let allowed_hosts = get_allowed_hosts_from_workspace(db)?;
63
64    // Check if host is in allowlist
65    if allowed_hosts
66        .iter()
67        .any(|allowed| host_matches(host, allowed))
68    {
69        return Ok(());
70    }
71
72    // Host not allowed
73    Err(EureQueryError::HostNotAllowed {
74        url: url.clone(),
75        host: host.to_string(),
76    }
77    .into())
78}
79
80/// Check if a host matches an allowed pattern.
81///
82/// Supports:
83/// - Exact match: "example.com" matches "example.com"
84/// - Wildcard subdomain: "*.example.com" matches "sub.example.com" and "example.com"
85fn host_matches(host: &str, pattern: &str) -> bool {
86    if let Some(suffix) = pattern.strip_prefix("*.") {
87        // Wildcard pattern: *.example.com matches sub.example.com and example.com
88        host == suffix || host.ends_with(&format!(".{}", suffix))
89    } else {
90        // Exact match
91        host == pattern
92    }
93}
94
95/// Get allowed hosts from all workspace configs.
96///
97/// Aggregates allowed hosts from all registered workspaces.
98fn get_allowed_hosts_from_workspace(db: &impl Db) -> Result<Vec<String>, QueryError> {
99    let mut allowed_hosts = Vec::new();
100
101    for workspace_id in db.list_asset_keys::<WorkspaceId>() {
102        let config = db.query(WorkspaceConfig::new(workspace_id))?;
103        allowed_hosts.extend(config.config.allowed_hosts().iter().cloned());
104    }
105
106    Ok(allowed_hosts)
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    mod host_matches_tests {
114        use super::*;
115
116        #[test]
117        fn exact_match() {
118            assert!(host_matches("example.com", "example.com"));
119            assert!(!host_matches("other.com", "example.com"));
120            assert!(!host_matches("sub.example.com", "example.com"));
121        }
122
123        #[test]
124        fn wildcard_match_subdomain() {
125            assert!(host_matches("sub.example.com", "*.example.com"));
126            assert!(host_matches("a.b.example.com", "*.example.com"));
127        }
128
129        #[test]
130        fn wildcard_match_base() {
131            // *.example.com also matches example.com
132            assert!(host_matches("example.com", "*.example.com"));
133        }
134
135        #[test]
136        fn wildcard_no_match() {
137            assert!(!host_matches("other.com", "*.example.com"));
138            assert!(!host_matches("exampleXcom", "*.example.com"));
139        }
140
141        #[test]
142        fn empty_pattern() {
143            assert!(!host_matches("example.com", ""));
144            assert!(host_matches("", ""));
145        }
146    }
147}