use cid::Cid;
use serde::{de, Deserialize, Deserializer, Serialize};
use sha2::{Digest, Sha256};
use std::{collections::HashSet, sync::Arc, time::Duration};
use tokio::{sync::RwLock, task::JoinHandle};
use tracing::{debug, log::error};
const BAD_BITS_UPDATE_INTERVAL: Duration = Duration::from_secs(3600 * 8);
const DEFAULT_DENY_LIST_URI: &str = "http://badbits.dwebops.pub/denylist.json";
#[derive(Debug, Deserialize, Serialize, Clone, Eq, Hash, PartialEq)]
pub struct BadBitsAnchor {
#[serde(rename = "anchor", deserialize_with = "de_from_str")]
pub value: [u8; 32],
}
fn de_from_str<'de, D>(deserializer: D) -> Result<[u8; 32], D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
let b = hex::decode(s).unwrap();
b[..32].try_into().map_err(de::Error::custom)
}
#[derive(Debug)]
pub struct BadBits {
pub last_updated: time::Instant,
pub denylist: HashSet<BadBitsAnchor>,
}
impl BadBits {
pub fn new() -> Self {
Self {
last_updated: time::Instant::now(),
denylist: HashSet::new(),
}
}
pub fn update(&mut self, denylist: HashSet<BadBitsAnchor>) {
self.last_updated = time::Instant::now();
self.denylist = denylist;
}
pub fn is_bad(&self, cid: &Cid, path: &str) -> bool {
let hash = BadBits::to_anchor(*cid, path);
self.denylist.contains(&hash)
}
pub fn to_anchor(cid: Cid, path: &str) -> BadBitsAnchor {
let mut hasher = Sha256::new();
hasher.update(cid.into_v1().unwrap().to_string());
if !path.starts_with('/') {
hasher.update("/");
}
hasher.update(path);
BadBitsAnchor {
value: hasher.finalize()[..].try_into().unwrap(),
}
}
}
impl Default for BadBits {
fn default() -> Self {
Self::new()
}
}
pub fn spawn_bad_bits_updater(bad_bits: Arc<Option<RwLock<BadBits>>>) -> Option<JoinHandle<()>> {
if bad_bits.is_some() {
return Some(tokio::spawn(async move {
let bad_bits = bad_bits.as_ref();
if let Some(bbits) = bad_bits {
loop {
let denylist_uri = DEFAULT_DENY_LIST_URI;
let res = reqwest::get(denylist_uri).await.unwrap();
if res.status().is_success() {
let body = res.bytes().await.unwrap();
if body.len() > 1 << 26 {
error!("denylist too large: {}", body.len());
} else {
let body =
serde_json::from_slice::<Vec<BadBitsAnchor>>(&body[..]).unwrap();
let new_denylist = HashSet::from_iter(body);
bbits.write().await.update(new_denylist);
debug!(
"updated denylist: len={}",
bbits.read().await.denylist.len()
);
}
} else {
error!("Failed to fetch denylist: {}", res.status());
}
tokio::time::sleep(BAD_BITS_UPDATE_INTERVAL).await;
}
}
}));
}
None
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use hex_literal::hex;
use http::StatusCode;
use iroh_resolver::dns_resolver::Config as DnsResolverConfig;
use iroh_rpc_client::{Client as RpcClient, Config as RpcClientConfig};
use iroh_unixfs::content_loader::{FullLoader, FullLoaderConfig, GatewayUrl};
use super::*;
use crate::config::Config;
#[tokio::test]
async fn bad_bits_anchor() {
let cid =
Cid::from_str("bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupoq").unwrap();
let anchor = BadBits::to_anchor(cid, "");
assert_eq!(
anchor.value,
hex!("d572cfd7fca1f89293f2d71270c51d82445b4502207a0df0707586b3e799521b")
);
let cid =
Cid::from_str("bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupoq").unwrap();
let path = "/";
let anchor = BadBits::to_anchor(cid, path);
assert_eq!(
anchor.value,
hex!("d572cfd7fca1f89293f2d71270c51d82445b4502207a0df0707586b3e799521b")
);
let cid =
Cid::from_str("bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupoq").unwrap();
let path = "/test";
let anchor = BadBits::to_anchor(cid, path);
assert_eq!(
anchor.value,
hex!("b62182173b68ef7ffb1ea5053717b700b80a21a5c28501900b050d704099b3c5")
);
let cid = Cid::from_str("QmdZ8zoh1iCsk8TdSAWN49tziH5MMn8XPvJcWmpFD1ygB7").unwrap();
let path = "";
let anchor = BadBits::to_anchor(cid, path);
assert_eq!(
anchor.value,
hex!("1a0e25ca02cd2c97af7e200b9b1a1db11c763473d60c12b8193078c95bbf917f")
);
let cid = Cid::from_str("QmdZ8zoh1iCsk8TdSAWN49tziH5MMn8XPvJcWmpFD1ygB7").unwrap();
let path = "/test";
let anchor = BadBits::to_anchor(cid, path);
assert_eq!(
anchor.value,
hex!("bf61fece5f55f922abf36e852b7c329b30350fdcba0d51328c95f33bab0bd5e9")
);
}
#[tokio::test]
async fn gateway_bad_bits() {
let bad_cid =
Cid::from_str("bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupoq").unwrap();
let bad_path = "bad/foo.jpeg";
let good_cid =
Cid::from_str("bafkreieq5jui4j25lacwomsqgjeswwl3y5zcdrresptwgmfylxo2depppq").unwrap();
let good_path = "good/foo.jpeg";
let bad_cid_2 =
Cid::from_str("bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi").unwrap();
let mut bbits = BadBits::new();
let mut deny_list = HashSet::<BadBitsAnchor>::new();
deny_list.insert(BadBits::to_anchor(bad_cid, ""));
deny_list.insert(BadBits::to_anchor(bad_cid, bad_path));
deny_list.insert(BadBits::to_anchor(bad_cid_2, bad_path));
bbits.update(deny_list);
let mut config = Config::new(
0,
RpcClientConfig {
gateway_addr: None,
p2p_addr: None,
store_addr: None,
channels: Some(1),
},
);
config.redirect_to_subdomain = false;
config.set_default_headers();
let rpc_addr = "irpc://0.0.0.0:0".parse().unwrap();
let rpc_client = RpcClient::new(config.rpc_client.clone()).await.unwrap();
let loader_config = FullLoaderConfig {
http_gateways: config
.http_resolvers
.iter()
.flatten()
.map(|u| GatewayUrl::from_str(u).unwrap())
.collect(),
indexer: config.indexer_endpoint.as_ref().map(|p| p.parse().unwrap()),
};
let content_loader =
FullLoader::new(rpc_client.clone(), loader_config).expect("invalid config");
let handler = crate::core::Core::new(
Arc::new(config),
rpc_addr,
Arc::new(Some(RwLock::new(bbits))),
content_loader,
DnsResolverConfig::default(),
)
.await
.unwrap();
let server = handler.server();
let addr = server.local_addr();
let core_task = tokio::spawn(async move {
server.await.unwrap();
});
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
assert_eq!(StatusCode::GONE, res.status());
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid}/{bad_path}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
assert_eq!(StatusCode::GONE, res.status());
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid_2}/{bad_path}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
let status = res.status();
assert_eq!(StatusCode::GONE, status);
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid_2}/{bad_path}?format=raw"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
let status = res.status();
assert_eq!(StatusCode::GONE, status);
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid}/{good_path}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
assert!(res.status() != StatusCode::GONE);
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{bad_cid_2}/{good_path}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
assert!(res.status() != StatusCode::GONE);
let uri = hyper::Uri::builder()
.scheme("http")
.authority(format!("localhost:{}", addr.port()))
.path_and_query(format!("/ipfs/{good_cid}"))
.build()
.unwrap();
let client = hyper::Client::new();
let res = client.get(uri).await.unwrap();
assert!(res.status() != StatusCode::GONE);
core_task.abort();
core_task.await.unwrap_err();
}
}