Skip to main content

everruns_core/
system_allowlist.rs

1//! System-wide outbound allowlist ("green list").
2//!
3//! An optional, host-owned global allowlist of well-known public resources
4//! (package registries, source hosting, AI/cloud provider APIs, ...). It is an
5//! internal, curated list — not agent/session/user configuration — shipped as
6//! an embedded TOML (`system_allowlist.toml`) and grouped by category so it
7//! stays manageable.
8//!
9//! When enabled via `EVERRUNS_SYSTEM_ALLOWLIST_ENABLED`, the egress boundary
10//! denies any outbound request whose URL does not match one of the groups, in
11//! addition to (and independently of) the per-agent/session
12//! [`NetworkAccessList`]. It is disabled by default, so the default behavior is
13//! unchanged. See `specs/system-allowlist.md`.
14
15use crate::network_access::NetworkAccessList;
16use serde::Deserialize;
17use std::collections::BTreeMap;
18use std::sync::{Arc, OnceLock};
19
20/// Environment variable that enables the global system allowlist.
21pub const SYSTEM_ALLOWLIST_ENABLED_ENV: &str = "EVERRUNS_SYSTEM_ALLOWLIST_ENABLED";
22
23/// Embedded TOML source of the curated allowlist.
24const EMBEDDED_TOML: &str = include_str!("system_allowlist.toml");
25
26#[derive(Debug, Clone, Deserialize)]
27struct AllowlistFile {
28    #[serde(default)]
29    groups: BTreeMap<String, GroupSpec>,
30}
31
32#[derive(Debug, Clone, Deserialize)]
33struct GroupSpec {
34    #[serde(default)]
35    description: Option<String>,
36    #[serde(default)]
37    allowed: Vec<String>,
38}
39
40/// A named category of allowed host patterns.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct AllowGroup {
43    pub name: String,
44    pub description: Option<String>,
45    pub allowed: Vec<String>,
46}
47
48/// Curated, system-wide outbound allowlist.
49///
50/// Matching reuses [`NetworkAccessList`] semantics: the flattened set of group
51/// patterns forms a single non-empty `allowed` list, so only URLs matching at
52/// least one pattern are permitted.
53#[derive(Debug, Clone)]
54pub struct SystemAllowlist {
55    groups: Vec<AllowGroup>,
56    acl: NetworkAccessList,
57}
58
59impl SystemAllowlist {
60    /// Parse a TOML document into a `SystemAllowlist`.
61    pub fn from_toml(source: &str) -> Result<Self, toml::de::Error> {
62        let file: AllowlistFile = toml::from_str(source)?;
63        let mut groups = Vec::with_capacity(file.groups.len());
64        let mut patterns = Vec::new();
65        for (name, spec) in file.groups {
66            patterns.extend(spec.allowed.iter().cloned());
67            groups.push(AllowGroup {
68                name,
69                description: spec.description,
70                allowed: spec.allowed,
71            });
72        }
73        // Fail closed: an allowlist with no patterns must deny everything. An
74        // empty `allowed` list in `NetworkAccessList` means "no restriction"
75        // (allow all), so an empty/misconfigured allowlist would otherwise
76        // silently disable enforcement. Substitute a sentinel that can never
77        // match a real URL, mirroring `merge_network_access`'s `<none>` guard.
78        let acl = if patterns.is_empty() {
79            NetworkAccessList::allow_only(["<none>"])
80        } else {
81            NetworkAccessList::allow_only(patterns)
82        };
83        Ok(Self { groups, acl })
84    }
85
86    /// The curated allowlist embedded in the binary (parsed once and cached).
87    pub fn embedded() -> Arc<SystemAllowlist> {
88        static EMBEDDED: OnceLock<Arc<SystemAllowlist>> = OnceLock::new();
89        EMBEDDED
90            .get_or_init(|| {
91                Arc::new(
92                    SystemAllowlist::from_toml(EMBEDDED_TOML)
93                        .expect("embedded system_allowlist.toml is valid"),
94                )
95            })
96            .clone()
97    }
98
99    /// Resolve the active allowlist from the environment.
100    ///
101    /// Returns `Some(embedded)` when `EVERRUNS_SYSTEM_ALLOWLIST_ENABLED` is
102    /// `true` or `1`, otherwise `None` (no global enforcement).
103    pub fn from_env() -> Option<Arc<SystemAllowlist>> {
104        let enabled = std::env::var(SYSTEM_ALLOWLIST_ENABLED_ENV)
105            .map(|value| value == "true" || value == "1")
106            .unwrap_or(false);
107        enabled.then(SystemAllowlist::embedded)
108    }
109
110    /// Categories in the allowlist.
111    pub fn groups(&self) -> &[AllowGroup] {
112        &self.groups
113    }
114
115    /// Whether the given URL matches any allowed pattern in any group.
116    pub fn is_url_allowed(&self, url: &str) -> bool {
117        self.acl.is_url_allowed(url)
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    #[test]
126    fn embedded_allowlist_parses_and_has_groups() {
127        let allowlist = SystemAllowlist::embedded();
128        assert!(
129            !allowlist.groups().is_empty(),
130            "embedded allowlist should define groups"
131        );
132        // Every group should contribute at least one pattern.
133        for group in allowlist.groups() {
134            assert!(
135                !group.allowed.is_empty(),
136                "group {} has no patterns",
137                group.name
138            );
139        }
140    }
141
142    #[test]
143    fn embedded_allowlist_permits_known_public_resources() {
144        let allowlist = SystemAllowlist::embedded();
145        for url in [
146            "https://registry.npmjs.org/left-pad",
147            "https://static.crates.io/crates/serde/serde-1.0.0.crate",
148            "https://files.pythonhosted.org/packages/abc.whl",
149            "https://api.openai.com/v1/responses",
150            "https://api.anthropic.com/v1/messages",
151            "https://codeload.github.com/owner/repo/tar.gz/main",
152            "https://ghcr.io/v2/owner/image/manifests/latest",
153        ] {
154            assert!(allowlist.is_url_allowed(url), "should allow {url}");
155        }
156    }
157
158    #[test]
159    fn embedded_allowlist_denies_unlisted_hosts() {
160        let allowlist = SystemAllowlist::embedded();
161        for url in [
162            "https://evil.example.com/payload",
163            "http://169.254.169.254/latest/meta-data/",
164            "https://random-blog.net/post",
165        ] {
166            assert!(!allowlist.is_url_allowed(url), "should deny {url}");
167        }
168    }
169
170    #[test]
171    fn empty_allowlist_fails_closed() {
172        // No groups, empty groups, and groups with no patterns must all deny
173        // every URL rather than silently allowing all traffic.
174        for source in ["", "[groups.empty]\n", "[groups.empty]\nallowed = []\n"] {
175            let allowlist = SystemAllowlist::from_toml(source).expect("valid toml");
176            assert!(
177                !allowlist.is_url_allowed("https://example.com/"),
178                "empty allowlist (source: {source:?}) must deny all URLs"
179            );
180        }
181    }
182
183    #[test]
184    fn from_toml_flattens_group_patterns() {
185        let allowlist = SystemAllowlist::from_toml(
186            r#"
187            [groups.alpha]
188            description = "first"
189            allowed = ["*.alpha.test"]
190
191            [groups.beta]
192            allowed = ["beta.test"]
193            "#,
194        )
195        .expect("valid toml");
196
197        assert_eq!(allowlist.groups().len(), 2);
198        assert!(allowlist.is_url_allowed("https://api.alpha.test/x"));
199        assert!(allowlist.is_url_allowed("https://beta.test/y"));
200        assert!(!allowlist.is_url_allowed("https://gamma.test/z"));
201    }
202}