use serde::{Deserialize, Serialize};
use std::collections::BTreeSet;
pub const DEFAULT_ALLOWLIST_TOML: &str = include_str!("../data/network_allowlist.toml");
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct NetworkAllowlist {
#[serde(default)]
pub meta: Option<AllowlistMeta>,
#[serde(default)]
pub ai_providers: AiProviderCategories,
#[serde(default)]
pub search: SearchCategories,
#[serde(default, rename = "web_crawl")]
pub web_crawl: Vec<AllowlistEntry>,
#[serde(default, rename = "mcp_servers")]
pub mcp_servers: Vec<AllowlistEntry>,
#[serde(default)]
pub package_registries: Vec<AllowlistEntry>,
#[serde(default)]
pub code_hosting: Vec<AllowlistEntry>,
#[serde(default)]
pub auth: Vec<AllowlistEntry>,
#[serde(default, rename = "dev_infra")]
pub dev_infra: Vec<AllowlistEntry>,
#[serde(default, rename = "os_updates")]
pub os_updates: Vec<AllowlistEntry>,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct AllowlistMeta {
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub last_updated: Option<String>,
#[serde(default)]
pub maintainer: Option<String>,
#[serde(default)]
pub repo: Option<String>,
#[serde(default)]
pub description: Option<String>,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct AiProviderCategories {
#[serde(default)]
pub cloud: Vec<AllowlistEntry>,
#[serde(default)]
pub local: Vec<LocalAiProviderEntry>,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct SearchCategories {
#[serde(default)]
pub web: Vec<AllowlistEntry>,
#[serde(default)]
pub specialized: Vec<AllowlistEntry>,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct AllowlistEntry {
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub domain: Option<String>,
#[serde(default)]
pub path: Option<String>,
#[serde(default)]
pub protocol: Option<String>,
#[serde(default)]
pub notes: Option<String>,
#[serde(default)]
pub verify: bool,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct LocalAiProviderEntry {
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub host: Option<String>,
#[serde(default)]
pub port: Option<u16>,
#[serde(default)]
pub protocol: Option<String>,
#[serde(default)]
pub notes: Option<String>,
}
impl NetworkAllowlist {
pub fn load_default() -> Self {
toml::from_str(DEFAULT_ALLOWLIST_TOML).unwrap_or_default()
}
pub fn all_allow_domains(&self) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
for entry in self
.ai_providers
.cloud
.iter()
.chain(self.search.web.iter())
.chain(self.search.specialized.iter())
.chain(self.web_crawl.iter())
.chain(self.mcp_servers.iter())
.chain(self.package_registries.iter())
.chain(self.code_hosting.iter())
.chain(self.auth.iter())
.chain(self.dev_infra.iter())
.chain(self.os_updates.iter())
{
if let Some(domain) = entry.domain.as_deref() {
let trimmed = domain.trim();
if !trimmed.is_empty() && !out.iter().any(|d| d == trimmed) {
out.push(trimmed.to_string());
}
}
}
out
}
pub fn all_allow_domains_set(&self) -> BTreeSet<String> {
self.all_allow_domains().into_iter().collect()
}
pub fn web_fetch_relevant_domains(&self) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let mut push = |entry: &AllowlistEntry| {
if let Some(domain) = entry.domain.as_deref() {
let trimmed = domain.trim();
if !trimmed.is_empty()
&& !out.iter().any(|d: &String| d == trimmed)
&& trimmed != "defuddle.md"
{
out.push(trimmed.to_string());
}
}
};
for entry in &self.search.web {
push(entry);
}
for entry in &self.search.specialized {
push(entry);
}
for entry in &self.web_crawl {
push(entry);
}
for entry in &self.mcp_servers {
push(entry);
}
for entry in &self.package_registries {
push(entry);
}
for entry in &self.code_hosting {
push(entry);
}
out
}
pub fn unverified_entries(&self) -> Vec<&AllowlistEntry> {
self.iter_entries().filter(|e| e.verify).collect()
}
pub fn iter_entries(&self) -> impl Iterator<Item = &AllowlistEntry> {
self.ai_providers
.cloud
.iter()
.chain(self.search.web.iter())
.chain(self.search.specialized.iter())
.chain(self.web_crawl.iter())
.chain(self.mcp_servers.iter())
.chain(self.package_registries.iter())
.chain(self.code_hosting.iter())
.chain(self.auth.iter())
.chain(self.dev_infra.iter())
.chain(self.os_updates.iter())
}
pub fn entry_count(&self) -> usize {
self.iter_entries().count()
}
pub fn category_summary(&self) -> String {
let mut parts = Vec::new();
if !self.ai_providers.cloud.is_empty() {
parts.push(format!(
"ai_providers.cloud: {}",
self.ai_providers.cloud.len()
));
}
if !self.ai_providers.local.is_empty() {
parts.push(format!(
"ai_providers.local: {}",
self.ai_providers.local.len()
));
}
if !self.search.web.is_empty() {
parts.push(format!("search.web: {}", self.search.web.len()));
}
if !self.search.specialized.is_empty() {
parts.push(format!(
"search.specialized: {}",
self.search.specialized.len()
));
}
if !self.web_crawl.is_empty() {
parts.push(format!("web_crawl: {}", self.web_crawl.len()));
}
if !self.mcp_servers.is_empty() {
parts.push(format!("mcp_servers: {}", self.mcp_servers.len()));
}
if !self.package_registries.is_empty() {
parts.push(format!(
"package_registries: {}",
self.package_registries.len()
));
}
if !self.code_hosting.is_empty() {
parts.push(format!("code_hosting: {}", self.code_hosting.len()));
}
if !self.auth.is_empty() {
parts.push(format!("auth: {}", self.auth.len()));
}
if !self.dev_infra.is_empty() {
parts.push(format!("dev_infra: {}", self.dev_infra.len()));
}
if !self.os_updates.is_empty() {
parts.push(format!("os_updates: {}", self.os_updates.len()));
}
parts.join(", ")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn load_default_parses_embedded_toml() {
let list = NetworkAllowlist::load_default();
assert!(
list.entry_count() > 50,
"expected many entries, got {}",
list.entry_count()
);
assert!(list.entry_count() <= 200, "allowlist grew unexpectedly");
}
#[test]
fn load_default_includes_common_dev_hosts() {
let list = NetworkAllowlist::load_default();
let domains = list.all_allow_domains_set();
for host in [
"github.com",
"api.github.com",
"crates.io",
"registry.npmjs.org",
"pypi.org",
"defuddle.md",
"r.jina.ai",
"api.tavily.com",
"api.anthropic.com",
] {
assert!(
domains.contains(host),
"default allowlist should include {host}; missing"
);
}
}
#[test]
fn load_default_preserves_wildcards() {
let list = NetworkAllowlist::load_default();
let domains = list.all_allow_domains_set();
for wildcard in ["*.auth0.com", "*.workers.dev", "*.vercel.app"] {
assert!(
domains.contains(wildcard),
"default allowlist should include wildcard {wildcard}"
);
}
}
#[test]
fn load_default_flags_unverified_entries() {
let list = NetworkAllowlist::load_default();
let unverified: Vec<&str> = list
.unverified_entries()
.iter()
.filter_map(|e| e.name.as_deref())
.collect();
assert!(
unverified.iter().any(|n| n.contains("MiMo")),
"expected MiMo to be flagged verify=true; got {unverified:?}"
);
}
#[test]
fn category_summary_lists_populated_categories() {
let list = NetworkAllowlist::load_default();
let summary = list.category_summary();
assert!(summary.contains("ai_providers.cloud"));
assert!(summary.contains("search.web"));
assert!(summary.contains("code_hosting"));
}
}