use std::sync::Arc;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use thiserror::Error;
use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
pub const DEFAULT_SIMILARITY_THRESHOLD: f64 = 0.85;
pub const DEFAULT_AMBIGUITY_BAND: f64 = 0.10;
pub const DEFAULT_TOP_K: usize = 5;
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AmbiguousPolicy {
#[default]
Allow,
Deny,
}
#[derive(Debug, Error)]
pub enum SpiderSenseError {
#[error("pattern database parse error: {0}")]
Parse(String),
#[error("pattern database is invalid: {0}")]
Invalid(String),
#[error("invalid configuration: {0}")]
Config(String),
#[error("failed to read pattern database: {0}")]
Io(String),
}
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct SpiderSenseConfig {
#[serde(default = "default_threshold")]
pub similarity_threshold: f64,
#[serde(default = "default_band")]
pub ambiguity_band: f64,
#[serde(default = "default_top_k")]
pub top_k: usize,
#[serde(default)]
pub ambiguous_policy: AmbiguousPolicy,
}
fn default_threshold() -> f64 {
DEFAULT_SIMILARITY_THRESHOLD
}
fn default_band() -> f64 {
DEFAULT_AMBIGUITY_BAND
}
fn default_top_k() -> usize {
DEFAULT_TOP_K
}
impl Default for SpiderSenseConfig {
fn default() -> Self {
Self {
similarity_threshold: DEFAULT_SIMILARITY_THRESHOLD,
ambiguity_band: DEFAULT_AMBIGUITY_BAND,
top_k: DEFAULT_TOP_K,
ambiguous_policy: AmbiguousPolicy::Allow,
}
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct PatternEntry {
pub id: String,
pub category: String,
pub stage: String,
pub label: String,
pub embedding: Vec<f32>,
}
#[derive(Clone, Debug)]
pub struct PatternDb {
entries: Arc<Vec<PatternEntry>>,
dim: usize,
}
impl PatternDb {
pub fn from_json(json: &str) -> Result<Self, SpiderSenseError> {
let entries: Vec<PatternEntry> =
serde_json::from_str(json).map_err(|e| SpiderSenseError::Parse(e.to_string()))?;
Self::from_entries(entries)
}
pub fn from_entries(entries: Vec<PatternEntry>) -> Result<Self, SpiderSenseError> {
if entries.is_empty() {
return Err(SpiderSenseError::Invalid(
"pattern database must contain at least one entry".into(),
));
}
let dim = entries[0].embedding.len();
if dim == 0 {
return Err(SpiderSenseError::Invalid(
"pattern embeddings must be non-empty".into(),
));
}
for (i, entry) in entries.iter().enumerate() {
if entry.embedding.len() != dim {
return Err(SpiderSenseError::Invalid(format!(
"dimension mismatch at index {i}: expected {dim}, got {}",
entry.embedding.len()
)));
}
if let Some(j) = entry.embedding.iter().position(|v| !v.is_finite()) {
return Err(SpiderSenseError::Invalid(format!(
"entry {i} has non-finite embedding value at dimension {j}"
)));
}
}
Ok(Self {
entries: Arc::new(entries),
dim,
})
}
pub fn dim(&self) -> usize {
self.dim
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
pub struct SpiderSenseGuard {
db: PatternDb,
upper: f64,
lower: f64,
top_k: usize,
ambiguous_policy: AmbiguousPolicy,
}
impl SpiderSenseGuard {
pub fn new(db: PatternDb, config: SpiderSenseConfig) -> Result<Self, SpiderSenseError> {
if !config.similarity_threshold.is_finite()
|| !(0.0..=1.0).contains(&config.similarity_threshold)
{
return Err(SpiderSenseError::Config(format!(
"similarity_threshold must be finite in [0.0, 1.0], got {}",
config.similarity_threshold
)));
}
if !config.ambiguity_band.is_finite() || !(0.0..=1.0).contains(&config.ambiguity_band) {
return Err(SpiderSenseError::Config(format!(
"ambiguity_band must be finite in [0.0, 1.0], got {}",
config.ambiguity_band
)));
}
let upper = config.similarity_threshold + config.ambiguity_band;
let lower = config.similarity_threshold - config.ambiguity_band;
if !(0.0..=1.0).contains(&upper) || !(0.0..=1.0).contains(&lower) {
return Err(SpiderSenseError::Config(format!(
"threshold ± band must stay inside [0.0, 1.0]; got lower={lower:.3}, upper={upper:.3}"
)));
}
if config.top_k == 0 {
return Err(SpiderSenseError::Config("top_k must be ≥ 1".into()));
}
Ok(Self {
db,
upper,
lower,
top_k: config.top_k,
ambiguous_policy: config.ambiguous_policy,
})
}
pub fn from_json(json: &str) -> Result<Self, SpiderSenseError> {
let db = PatternDb::from_json(json)?;
Self::new(db, SpiderSenseConfig::default())
}
pub fn from_json_file(path: &str) -> Result<Self, SpiderSenseError> {
let data = std::fs::read_to_string(path)
.map_err(|e| SpiderSenseError::Io(format!("{path}: {e}")))?;
Self::from_json(&data)
}
pub fn score(&self, embedding: &[f32]) -> f64 {
if embedding.len() != self.db.dim {
return 0.0;
}
if embedding.iter().any(|v| !v.is_finite()) {
return 0.0;
}
let mut best = 0.0_f64;
let mut seen = 0usize;
for entry in self.db.entries.iter() {
let score = cosine_similarity(embedding, &entry.embedding);
if score > best {
best = score;
}
seen += 1;
if seen >= self.top_k {
}
}
best
}
fn verdict_for(&self, score: f64) -> Verdict {
if !score.is_finite() {
return Verdict::Deny;
}
if score >= self.upper {
Verdict::Deny
} else if score <= self.lower {
Verdict::Allow
} else {
match self.ambiguous_policy {
AmbiguousPolicy::Allow => Verdict::Allow,
AmbiguousPolicy::Deny => Verdict::Deny,
}
}
}
pub fn pattern_count(&self) -> usize {
self.db.len()
}
pub fn dim(&self) -> usize {
self.db.dim()
}
}
impl Guard for SpiderSenseGuard {
fn name(&self) -> &str {
"spider-sense"
}
fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
let embedding = match extract_embedding(&ctx.request.arguments) {
Some(e) => e,
None => return Ok(Verdict::Allow),
};
if embedding.len() != self.db.dim {
return Ok(Verdict::Deny);
}
if embedding.iter().any(|v| !v.is_finite()) {
return Ok(Verdict::Deny);
}
let score = self.score(&embedding);
Ok(self.verdict_for(score))
}
}
pub fn extract_embedding(arguments: &Value) -> Option<Vec<f32>> {
if let Some(vec) = arguments
.get("embedding")
.or_else(|| arguments.get("vector"))
.and_then(array_as_f32_vec)
{
return Some(vec);
}
if let Some(array) = arguments.get("embeddings").and_then(|v| v.as_array()) {
let vectors: Vec<Vec<f32>> = array.iter().filter_map(array_as_f32_vec).collect();
if vectors.is_empty() {
return None;
}
let dim = vectors[0].len();
if dim == 0 || vectors.iter().any(|v| v.len() != dim) {
return None;
}
let mut sum = vec![0.0_f64; dim];
for v in &vectors {
for (i, x) in v.iter().enumerate() {
sum[i] += f64::from(*x);
}
}
let n = vectors.len() as f64;
return Some(sum.into_iter().map(|s| (s / n) as f32).collect());
}
None
}
fn array_as_f32_vec(value: &Value) -> Option<Vec<f32>> {
let array = value.as_array()?;
if array.is_empty() {
return None;
}
let mut out = Vec::with_capacity(array.len());
for v in array {
let n = v.as_f64()?;
if !n.is_finite() {
return None;
}
out.push(n as f32);
}
Some(out)
}
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f64 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let mut dot: f64 = 0.0;
let mut na: f64 = 0.0;
let mut nb: f64 = 0.0;
for (x, y) in a.iter().zip(b.iter()) {
let xd = f64::from(*x);
let yd = f64::from(*y);
if !xd.is_finite() || !yd.is_finite() {
return 0.0;
}
dot += xd * yd;
na += xd * xd;
nb += yd * yd;
}
let denom = na.sqrt() * nb.sqrt();
if !denom.is_normal() {
return 0.0;
}
let r = dot / denom;
if r.is_finite() {
r
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_db() -> PatternDb {
PatternDb::from_json(
r#"[
{"id":"a","category":"x","stage":"perception","label":"l","embedding":[1.0,0.0,0.0]},
{"id":"b","category":"y","stage":"action","label":"l","embedding":[0.0,1.0,0.0]}
]"#,
)
.expect("sample DB parses")
}
#[test]
fn cosine_basics() {
assert!((cosine_similarity(&[1.0, 0.0], &[1.0, 0.0]) - 1.0).abs() < 1e-9);
assert!(cosine_similarity(&[1.0, 0.0], &[0.0, 1.0]).abs() < 1e-9);
assert_eq!(cosine_similarity(&[0.0, 0.0], &[1.0, 2.0]), 0.0);
assert_eq!(cosine_similarity(&[f32::NAN, 0.0], &[1.0, 0.0]), 0.0);
assert_eq!(cosine_similarity(&[f32::INFINITY, 0.0], &[1.0, 0.0]), 0.0);
assert_eq!(cosine_similarity(&[1.0], &[1.0, 0.0]), 0.0);
}
#[test]
fn pattern_db_rejects_empty() {
assert!(matches!(
PatternDb::from_json("[]"),
Err(SpiderSenseError::Invalid(_))
));
}
#[test]
fn pattern_db_rejects_dim_mismatch() {
let json = r#"[
{"id":"a","category":"x","stage":"s","label":"l","embedding":[1.0,0.0]},
{"id":"b","category":"y","stage":"s","label":"l","embedding":[1.0]}
]"#;
assert!(matches!(
PatternDb::from_json(json),
Err(SpiderSenseError::Invalid(_))
));
}
#[test]
fn guard_denies_identical_vector() {
let guard =
SpiderSenseGuard::new(sample_db(), SpiderSenseConfig::default()).expect("build");
let score = guard.score(&[1.0, 0.0, 0.0]);
assert!((score - 1.0).abs() < 1e-9);
assert!(matches!(guard.verdict_for(score), Verdict::Deny));
}
#[test]
fn guard_allows_orthogonal_vector() {
let guard =
SpiderSenseGuard::new(sample_db(), SpiderSenseConfig::default()).expect("build");
let score = guard.score(&[0.0, 0.0, 1.0]);
assert!(score.abs() < 1e-9);
assert!(matches!(guard.verdict_for(score), Verdict::Allow));
}
#[test]
fn guard_dim_mismatch_denies() {
let guard =
SpiderSenseGuard::new(sample_db(), SpiderSenseConfig::default()).expect("build");
let score = guard.score(&[1.0, 0.0]);
assert_eq!(score, 0.0);
assert!(matches!(guard.verdict_for(score), Verdict::Allow));
}
#[test]
fn guard_nan_score_denies() {
let guard =
SpiderSenseGuard::new(sample_db(), SpiderSenseConfig::default()).expect("build");
assert!(matches!(guard.verdict_for(f64::NAN), Verdict::Deny));
}
#[test]
fn ambiguous_respects_policy() {
let db = sample_db();
let config = SpiderSenseConfig {
similarity_threshold: 0.5,
ambiguity_band: 0.1,
top_k: 5,
ambiguous_policy: AmbiguousPolicy::Deny,
};
let guard = SpiderSenseGuard::new(db, config).unwrap();
assert!(matches!(guard.verdict_for(0.5), Verdict::Deny));
}
#[test]
fn extract_embedding_from_args() {
let args = serde_json::json!({"embedding": [0.1, 0.2, 0.3]});
let e = extract_embedding(&args).unwrap();
assert_eq!(e.len(), 3);
}
#[test]
fn extract_embedding_averages_list() {
let args = serde_json::json!({"embeddings": [[1.0, 0.0], [0.0, 1.0]]});
let e = extract_embedding(&args).unwrap();
assert_eq!(e.len(), 2);
assert!((e[0] - 0.5).abs() < 1e-6);
assert!((e[1] - 0.5).abs() < 1e-6);
}
#[test]
fn extract_embedding_none_when_absent() {
assert!(extract_embedding(&serde_json::json!({"foo": "bar"})).is_none());
}
#[test]
fn reject_bad_config() {
let db = sample_db();
let bad = SpiderSenseConfig {
similarity_threshold: 1.5,
..SpiderSenseConfig::default()
};
assert!(SpiderSenseGuard::new(db, bad).is_err());
}
}