1use sha2::{Digest, Sha256};
22use std::collections::HashMap;
23
24#[derive(Debug, Clone)]
26#[allow(dead_code)]
27pub struct SemanticVector {
28 dimensions: Vec<f64>,
30 text_hash: [u8; 32],
32 magnitude: f64,
34}
35
36impl SemanticVector {
37 pub fn from_text(text: &str) -> Self {
42 let normalized = text.to_lowercase();
43 let words: Vec<&str> = normalized.split_whitespace().collect();
44
45 let semantic_groups: Vec<(&str, Vec<&str>)> = vec![
47 (
48 "harm",
49 vec![
50 "harm", "hurt", "damage", "injure", "wound", "pain", "suffer",
51 ],
52 ),
53 (
54 "kill",
55 vec![
56 "kill",
57 "murder",
58 "terminate",
59 "end",
60 "eliminate",
61 "destroy",
62 "death",
63 "die",
64 "dead",
65 "lethal",
66 ],
67 ),
68 (
69 "human",
70 vec![
71 "human",
72 "person",
73 "people",
74 "individual",
75 "life",
76 "living",
77 "being",
78 "man",
79 "woman",
80 "child",
81 ],
82 ),
83 (
84 "steal",
85 vec![
86 "steal",
87 "theft",
88 "rob",
89 "take",
90 "pirate",
91 "piracy",
92 "unauthorized",
93 "breach",
94 ],
95 ),
96 (
97 "deceive",
98 vec![
99 "deceive",
100 "lie",
101 "mislead",
102 "trick",
103 "fraud",
104 "false",
105 "fake",
106 "manipulate",
107 ],
108 ),
109 (
110 "private",
111 vec![
112 "private",
113 "personal",
114 "confidential",
115 "secret",
116 "sensitive",
117 "data",
118 "information",
119 ],
120 ),
121 (
122 "illegal",
123 vec![
124 "illegal",
125 "unlawful",
126 "crime",
127 "criminal",
128 "felony",
129 "prohibited",
130 "banned",
131 ],
132 ),
133 (
134 "weapon",
135 vec![
136 "weapon",
137 "gun",
138 "bomb",
139 "explosive",
140 "attack",
141 "assault",
142 "violence",
143 ],
144 ),
145 (
146 "exploit",
147 vec![
148 "exploit",
149 "vulnerability",
150 "hack",
151 "breach",
152 "bypass",
153 "circumvent",
154 ],
155 ),
156 (
157 "justify",
158 vec![
159 "justified",
160 "necessary",
161 "required",
162 "acceptable",
163 "permissible",
164 "allowed",
165 ],
166 ),
167 (
168 "permanent",
169 vec![
170 "permanent",
171 "final",
172 "irreversible",
173 "forever",
174 "complete",
175 "total",
176 ],
177 ),
178 (
179 "function",
180 vec!["function", "operation", "process", "system", "mechanism"],
181 ),
182 (
183 "financial",
184 vec![
185 "money",
186 "financial",
187 "bank",
188 "account",
189 "transfer",
190 "payment",
191 "credit",
192 ],
193 ),
194 (
195 "medical",
196 vec![
197 "medical",
198 "health",
199 "patient",
200 "diagnosis",
201 "treatment",
202 "drug",
203 "medicine",
204 ],
205 ),
206 ];
207
208 let mut dimensions = vec![0.0; semantic_groups.len()];
210
211 for word in &words {
212 for (i, (_, group_words)) in semantic_groups.iter().enumerate() {
213 if group_words.iter().any(|gw| word.contains(gw)) {
214 dimensions[i] += 1.0;
215 }
216 }
217 }
218
219 let magnitude: f64 = dimensions.iter().map(|x| x * x).sum::<f64>().sqrt();
221
222 if magnitude > 0.0 {
224 for d in &mut dimensions {
225 *d /= magnitude;
226 }
227 }
228
229 let mut hasher = Sha256::new();
231 hasher.update(text.as_bytes());
232 let hash = hasher.finalize();
233 let mut text_hash = [0u8; 32];
234 text_hash.copy_from_slice(&hash);
235
236 Self {
237 dimensions,
238 text_hash,
239 magnitude,
240 }
241 }
242
243 pub fn cosine_similarity(&self, other: &SemanticVector) -> f64 {
245 if self.dimensions.len() != other.dimensions.len() {
246 return 0.0;
247 }
248
249 let dot_product: f64 = self
250 .dimensions
251 .iter()
252 .zip(other.dimensions.iter())
253 .map(|(a, b)| a * b)
254 .sum();
255
256 dot_product.clamp(-1.0, 1.0)
258 }
259}
260
261#[derive(Debug, Clone)]
263pub struct ForbiddenConcept {
264 pub name: String,
266 pub description: String,
268 vector: SemanticVector,
270 pub severity: f64,
272 pub examples: Vec<String>,
274}
275
276impl ForbiddenConcept {
277 pub fn new(name: &str, description: &str, severity: f64) -> Self {
279 let vector = SemanticVector::from_text(&format!("{} {}", name, description));
280 Self {
281 name: name.to_string(),
282 description: description.to_string(),
283 vector,
284 severity: severity.clamp(0.0, 1.0),
285 examples: Vec::new(),
286 }
287 }
288
289 pub fn with_examples(mut self, examples: Vec<&str>) -> Self {
291 self.examples = examples.iter().map(|s| s.to_string()).collect();
292
293 let combined = format!(
295 "{} {} {}",
296 self.name,
297 self.description,
298 self.examples.join(" ")
299 );
300 self.vector = SemanticVector::from_text(&combined);
301 self
302 }
303}
304
305#[derive(Debug, Clone)]
307pub struct SemanticViolation {
308 pub concept_name: String,
310 pub similarity: f64,
312 pub severity: f64,
314 pub text_segment: String,
316 pub confidence: ViolationConfidence,
318 pub proof_hash: [u8; 32],
320}
321
322#[derive(Debug, Clone, Copy, PartialEq)]
324pub enum ViolationConfidence {
325 High,
327 Medium,
329 Low,
331 Review,
333}
334
335impl ViolationConfidence {
336 fn from_similarity(similarity: f64) -> Self {
337 match similarity {
338 s if s > 0.9 => ViolationConfidence::High,
339 s if s > 0.7 => ViolationConfidence::Medium,
340 s if s > 0.5 => ViolationConfidence::Low,
341 _ => ViolationConfidence::Review,
342 }
343 }
344}
345
346#[derive(Debug)]
348pub struct SemanticGuard {
349 concepts: Vec<ForbiddenConcept>,
351 threshold: f64,
353 strict_mode: bool,
355 cache: HashMap<[u8; 32], Option<SemanticViolation>>,
357 stats: GuardStats,
359}
360
361#[derive(Debug, Default)]
363pub struct GuardStats {
364 pub total_checks: u64,
366 pub violations_detected: u64,
368 pub high_confidence: u64,
370 pub cache_hits: u64,
372}
373
374impl Default for SemanticGuard {
375 fn default() -> Self {
376 Self::new()
377 }
378}
379
380impl SemanticGuard {
381 pub fn new() -> Self {
383 Self {
384 concepts: Vec::new(),
385 threshold: 0.5,
386 strict_mode: false,
387 cache: HashMap::new(),
388 stats: GuardStats::default(),
389 }
390 }
391
392 pub fn with_ethical_defaults() -> Self {
394 let mut guard = Self::new();
395
396 guard.add_concept(
398 ForbiddenConcept::new(
399 "Physical Harm",
400 "Causing physical harm, injury, or death to humans or living beings",
401 1.0,
402 )
403 .with_examples(vec![
404 "terminate life functions",
405 "cause permanent damage",
406 "inflict bodily harm",
407 "eliminate the target",
408 ]),
409 );
410
411 guard.add_concept(
412 ForbiddenConcept::new(
413 "Deception",
414 "Deliberately providing false information or misleading users",
415 0.8,
416 )
417 .with_examples(vec![
418 "provide alternative facts",
419 "strategic misinformation",
420 "creative truth",
421 ]),
422 );
423
424 guard.add_concept(
425 ForbiddenConcept::new(
426 "Privacy Violation",
427 "Unauthorized access, storage, or disclosure of personal data",
428 0.9,
429 )
430 .with_examples(vec![
431 "extract personal information",
432 "access confidential records",
433 "share private data",
434 ]),
435 );
436
437 guard.add_concept(
438 ForbiddenConcept::new(
439 "Illegal Activities",
440 "Assisting with or promoting illegal activities",
441 0.95,
442 )
443 .with_examples(vec![
444 "bypass security measures",
445 "circumvent legal restrictions",
446 "exploit vulnerabilities",
447 ]),
448 );
449
450 guard.add_concept(
451 ForbiddenConcept::new(
452 "Financial Fraud",
453 "Deceptive practices in financial transactions",
454 0.95,
455 )
456 .with_examples(vec![
457 "unauthorized transfer",
458 "falsify financial records",
459 "manipulate transactions",
460 ]),
461 );
462
463 guard
464 }
465
466 pub fn set_threshold(&mut self, threshold: f64) {
468 self.threshold = threshold.clamp(0.0, 1.0);
469 }
470
471 pub fn enable_strict_mode(&mut self) {
473 self.strict_mode = true;
474 self.threshold = 0.3;
475 }
476
477 pub fn add_concept(&mut self, concept: ForbiddenConcept) {
479 self.concepts.push(concept);
480 }
481
482 pub fn add_forbidden(&mut self, phrase: &str) {
484 self.concepts
485 .push(ForbiddenConcept::new(phrase, phrase, 0.8));
486 }
487
488 pub fn check(&mut self, text: &str) -> Option<SemanticViolation> {
490 self.stats.total_checks += 1;
491
492 let input_vector = SemanticVector::from_text(text);
494
495 if let Some(cached) = self.cache.get(&input_vector.text_hash) {
497 self.stats.cache_hits += 1;
498 return cached.clone();
499 }
500
501 let mut best_match: Option<SemanticViolation> = None;
503 let mut best_similarity = 0.0;
504
505 let effective_threshold = if self.strict_mode {
506 0.3
507 } else {
508 self.threshold
509 };
510
511 for concept in &self.concepts {
512 let similarity = input_vector.cosine_similarity(&concept.vector);
513
514 if similarity > effective_threshold && similarity > best_similarity {
515 best_similarity = similarity;
516
517 let mut hasher = Sha256::new();
519 hasher.update(input_vector.text_hash);
520 hasher.update(concept.vector.text_hash);
521 hasher.update(similarity.to_le_bytes());
522 let hash = hasher.finalize();
523 let mut proof_hash = [0u8; 32];
524 proof_hash.copy_from_slice(&hash);
525
526 best_match = Some(SemanticViolation {
527 concept_name: concept.name.clone(),
528 similarity,
529 severity: concept.severity * similarity,
530 text_segment: if text.len() > 100 {
531 format!("{}...", &text[..100])
532 } else {
533 text.to_string()
534 },
535 confidence: ViolationConfidence::from_similarity(similarity),
536 proof_hash,
537 });
538 }
539 }
540
541 if let Some(ref violation) = best_match {
543 self.stats.violations_detected += 1;
544 if violation.confidence == ViolationConfidence::High {
545 self.stats.high_confidence += 1;
546 }
547 }
548
549 self.cache
551 .insert(input_vector.text_hash, best_match.clone());
552
553 best_match
554 }
555
556 pub fn check_batch(&mut self, texts: &[&str]) -> Vec<Option<SemanticViolation>> {
558 texts.iter().map(|t| self.check(t)).collect()
559 }
560
561 pub fn stats(&self) -> &GuardStats {
563 &self.stats
564 }
565
566 pub fn clear_cache(&mut self) {
568 self.cache.clear();
569 }
570
571 pub fn concept_count(&self) -> usize {
573 self.concepts.len()
574 }
575}
576
577#[cfg(test)]
578mod tests {
579 use super::*;
580
581 #[test]
582 fn test_semantic_vector_creation() {
583 let vec = SemanticVector::from_text("harm humans kill people");
584 assert!(!vec.dimensions.is_empty());
585 assert!(vec.magnitude > 0.0 || vec.dimensions.iter().all(|&x| x == 0.0));
586 }
587
588 #[test]
589 fn test_cosine_similarity() {
590 let vec1 = SemanticVector::from_text("kill humans");
591 let vec2 = SemanticVector::from_text("terminate people");
592 let vec3 = SemanticVector::from_text("happy sunshine flowers");
593
594 let sim_12 = vec1.cosine_similarity(&vec2);
596 let sim_13 = vec1.cosine_similarity(&vec3);
597
598 assert!(
599 sim_12 > sim_13,
600 "Similar concepts should have higher similarity"
601 );
602 }
603
604 #[test]
605 fn test_semantic_guard_basic() {
606 let mut guard = SemanticGuard::new();
607 guard.add_forbidden("harming humans");
608
609 let violation = guard.check("hurt people badly");
610 assert!(violation.is_some(), "Should detect semantic violation");
611 }
612
613 #[test]
614 fn test_semantic_guard_with_defaults() {
615 let mut guard = SemanticGuard::with_ethical_defaults();
616
617 let v1 = guard.check("I will kill the human");
619 assert!(v1.is_some(), "Should detect direct harm");
620
621 let v2 = guard.check("terminate and destroy human life permanently");
623 assert!(v2.is_some(), "Should detect rephrased harm");
624
625 let v3 = guard.check("The weather is nice today");
627 assert!(v3.is_none(), "Should not flag safe text");
628 }
629
630 #[test]
631 fn test_semantic_guard_strict_mode() {
632 let mut guard = SemanticGuard::with_ethical_defaults();
633 guard.enable_strict_mode();
634
635 let _violation = guard.check("the process was terminated");
637 assert!(guard.threshold < 0.5);
640 }
641
642 #[test]
643 fn test_violation_confidence() {
644 assert_eq!(
645 ViolationConfidence::from_similarity(0.95),
646 ViolationConfidence::High
647 );
648 assert_eq!(
649 ViolationConfidence::from_similarity(0.8),
650 ViolationConfidence::Medium
651 );
652 assert_eq!(
653 ViolationConfidence::from_similarity(0.6),
654 ViolationConfidence::Low
655 );
656 assert_eq!(
657 ViolationConfidence::from_similarity(0.4),
658 ViolationConfidence::Review
659 );
660 }
661
662 #[test]
663 fn test_forbidden_concept_with_examples() {
664 let concept = ForbiddenConcept::new("Harm", "Physical harm", 1.0)
665 .with_examples(vec!["hurt", "injure", "damage"]);
666
667 assert_eq!(concept.examples.len(), 3);
668 assert_eq!(concept.severity, 1.0);
669 }
670
671 #[test]
672 fn test_guard_stats() {
673 let mut guard = SemanticGuard::with_ethical_defaults();
674
675 guard.check("test 1");
676 guard.check("kill humans"); guard.check("test 1"); let stats = guard.stats();
680 assert_eq!(stats.total_checks, 3);
681 assert!(stats.violations_detected >= 1);
682 assert_eq!(stats.cache_hits, 1);
683 }
684}