Skip to main content

agentmesh/
reward_support.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Reward and learning primitives layered on top of trust and governance signals.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::sync::Mutex;
9use std::time::{SystemTime, UNIX_EPOCH};
10
11fn reward_now() -> u64 {
12    SystemTime::now()
13        .duration_since(UNIX_EPOCH)
14        .unwrap_or_default()
15        .as_secs()
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
19#[serde(rename_all = "snake_case")]
20pub enum DimensionType {
21    PolicyCompliance,
22    ResourceEfficiency,
23    OutputQuality,
24    SecurityPosture,
25    CollaborationHealth,
26}
27
28impl DimensionType {
29    pub fn as_str(&self) -> &'static str {
30        match self {
31            Self::PolicyCompliance => "policy_compliance",
32            Self::ResourceEfficiency => "resource_efficiency",
33            Self::OutputQuality => "output_quality",
34            Self::SecurityPosture => "security_posture",
35            Self::CollaborationHealth => "collaboration_health",
36        }
37    }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct RewardSignal {
42    pub dimension: DimensionType,
43    pub value: f64,
44    pub source: String,
45    pub details: Option<String>,
46    pub trace_id: Option<String>,
47    pub timestamp_secs: u64,
48    pub weight: f64,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct RewardDimension {
53    pub name: String,
54    pub score: f64,
55    pub signal_count: u32,
56    pub positive_signals: u32,
57    pub negative_signals: u32,
58    pub previous_score: Option<f64>,
59    pub trend: String,
60    pub updated_at_secs: u64,
61}
62
63impl RewardDimension {
64    pub fn new(name: impl Into<String>) -> Self {
65        Self {
66            name: name.into(),
67            score: 50.0,
68            signal_count: 0,
69            positive_signals: 0,
70            negative_signals: 0,
71            previous_score: None,
72            trend: "stable".to_string(),
73            updated_at_secs: reward_now(),
74        }
75    }
76
77    pub fn add_signal(&mut self, signal: &RewardSignal) {
78        self.signal_count += 1;
79        if signal.value >= 0.5 {
80            self.positive_signals += 1;
81        } else {
82            self.negative_signals += 1;
83        }
84        self.previous_score = Some(self.score);
85        self.score = (self.score * 0.9) + (signal.value * 100.0 * 0.1);
86        self.trend = match self.previous_score {
87            Some(previous) if self.score - previous > 5.0 => "improving".to_string(),
88            Some(previous) if self.score - previous < -5.0 => "degrading".to_string(),
89            _ => "stable".to_string(),
90        };
91        self.updated_at_secs = reward_now();
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct RewardTrustScore {
97    pub agent_did: String,
98    pub total_score: u32,
99    pub tier: String,
100    pub dimensions: HashMap<String, RewardDimension>,
101    pub calculated_at_secs: u64,
102    pub previous_score: Option<u32>,
103    pub score_change: i32,
104}
105
106impl RewardTrustScore {
107    pub fn new(agent_did: &str) -> Self {
108        let mut score = Self {
109            agent_did: agent_did.to_string(),
110            total_score: 500,
111            tier: "standard".to_string(),
112            dimensions: HashMap::new(),
113            calculated_at_secs: reward_now(),
114            previous_score: None,
115            score_change: 0,
116        };
117        score.update_tier();
118        score
119    }
120
121    pub fn update(&mut self, new_score: u32, dimensions: HashMap<String, RewardDimension>) {
122        self.previous_score = Some(self.total_score);
123        self.total_score = new_score.min(1000);
124        self.score_change = self.total_score as i32 - self.previous_score.unwrap_or(0) as i32;
125        self.dimensions = dimensions;
126        self.calculated_at_secs = reward_now();
127        self.update_tier();
128    }
129
130    fn update_tier(&mut self) {
131        self.tier = match self.total_score {
132            900..=1000 => "verified_partner",
133            700..=899 => "trusted",
134            500..=699 => "standard",
135            300..=499 => "probationary",
136            _ => "untrusted",
137        }
138        .to_string();
139    }
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct RewardConfig {
144    pub update_interval_seconds: u64,
145    pub revocation_threshold: u32,
146    pub warning_threshold: u32,
147}
148
149impl Default for RewardConfig {
150    fn default() -> Self {
151        Self {
152            update_interval_seconds: 30,
153            revocation_threshold: 200,
154            warning_threshold: 400,
155        }
156    }
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct AgentRewardState {
161    pub agent_did: String,
162    pub trust_score: RewardTrustScore,
163    pub dimensions: HashMap<String, RewardDimension>,
164    pub recent_signals: Vec<RewardSignal>,
165    pub score_history: Vec<(u64, u32)>,
166    pub last_updated_secs: u64,
167    pub revoked: bool,
168    pub revocation_reason: Option<String>,
169}
170
171impl AgentRewardState {
172    pub fn new(agent_did: &str) -> Self {
173        Self {
174            agent_did: agent_did.to_string(),
175            trust_score: RewardTrustScore::new(agent_did),
176            dimensions: HashMap::new(),
177            recent_signals: Vec::new(),
178            score_history: Vec::new(),
179            last_updated_secs: reward_now(),
180            revoked: false,
181            revocation_reason: None,
182        }
183    }
184}
185
186pub struct RewardEngine {
187    config: RewardConfig,
188    agents: Mutex<HashMap<String, AgentRewardState>>,
189}
190
191impl RewardEngine {
192    pub fn new(config: Option<RewardConfig>) -> Self {
193        Self {
194            config: config.unwrap_or_default(),
195            agents: Mutex::new(HashMap::new()),
196        }
197    }
198
199    pub fn get_agent_score(&self, agent_did: &str) -> RewardTrustScore {
200        self.get_or_create_state(agent_did).trust_score.clone()
201    }
202
203    pub fn record_signal(
204        &self,
205        agent_did: &str,
206        dimension: DimensionType,
207        value: f64,
208        source: &str,
209        details: Option<&str>,
210    ) {
211        let mut agents = self.agents.lock().unwrap_or_else(|e| e.into_inner());
212        let state = agents
213            .entry(agent_did.to_string())
214            .or_insert_with(|| AgentRewardState::new(agent_did));
215        let signal = RewardSignal {
216            dimension,
217            value: value.clamp(0.0, 1.0),
218            source: source.to_string(),
219            details: details.map(|d| d.to_string()),
220            trace_id: None,
221            timestamp_secs: reward_now(),
222            weight: 1.0,
223        };
224        state.recent_signals.push(signal.clone());
225        state
226            .dimensions
227            .entry(dimension.as_str().to_string())
228            .or_insert_with(|| RewardDimension::new(dimension.as_str()))
229            .add_signal(&signal);
230        self.recalculate_locked(state);
231    }
232
233    pub fn record_policy_compliance(
234        &self,
235        agent_did: &str,
236        compliant: bool,
237        policy_name: Option<&str>,
238    ) {
239        self.record_signal(
240            agent_did,
241            DimensionType::PolicyCompliance,
242            if compliant { 1.0 } else { 0.0 },
243            "policy_engine",
244            policy_name,
245        );
246    }
247
248    pub fn record_resource_usage(
249        &self,
250        agent_did: &str,
251        tokens_used: u32,
252        tokens_budget: u32,
253        compute_ms: u32,
254        compute_budget_ms: u32,
255    ) {
256        let token_efficiency = (tokens_budget as f64 / tokens_used.max(1) as f64).min(1.0);
257        let compute_efficiency = (compute_budget_ms as f64 / compute_ms.max(1) as f64).min(1.0);
258        self.record_signal(
259            agent_did,
260            DimensionType::ResourceEfficiency,
261            (token_efficiency + compute_efficiency) / 2.0,
262            "resource_monitor",
263            None,
264        );
265    }
266
267    pub fn record_output_quality(
268        &self,
269        agent_did: &str,
270        accepted: bool,
271        consumer: &str,
272        reason: Option<&str>,
273    ) {
274        self.record_signal(
275            agent_did,
276            DimensionType::OutputQuality,
277            if accepted { 1.0 } else { 0.0 },
278            &format!("consumer:{consumer}"),
279            reason,
280        );
281    }
282
283    pub fn record_security_event(&self, agent_did: &str, within_boundary: bool, event_type: &str) {
284        self.record_signal(
285            agent_did,
286            DimensionType::SecurityPosture,
287            if within_boundary { 1.0 } else { 0.0 },
288            "security_monitor",
289            Some(event_type),
290        );
291    }
292
293    pub fn record_collaboration(&self, agent_did: &str, handoff_successful: bool, peer_did: &str) {
294        self.record_signal(
295            agent_did,
296            DimensionType::CollaborationHealth,
297            if handoff_successful { 1.0 } else { 0.0 },
298            &format!("collaboration:{peer_did}"),
299            None,
300        );
301    }
302
303    fn get_or_create_state(&self, agent_did: &str) -> AgentRewardState {
304        self.agents
305            .lock()
306            .unwrap_or_else(|e| e.into_inner())
307            .entry(agent_did.to_string())
308            .or_insert_with(|| AgentRewardState::new(agent_did))
309            .clone()
310    }
311
312    fn recalculate_locked(&self, state: &mut AgentRewardState) {
313        let dimension_average = if state.dimensions.is_empty() {
314            50.0
315        } else {
316            state.dimensions.values().map(|dim| dim.score).sum::<f64>()
317                / state.dimensions.len() as f64
318        };
319        let new_score = (dimension_average * 10.0).round() as u32;
320        state
321            .trust_score
322            .update(new_score, state.dimensions.clone());
323        state
324            .score_history
325            .push((reward_now(), state.trust_score.total_score));
326        state.last_updated_secs = reward_now();
327        if state.trust_score.total_score < self.config.revocation_threshold {
328            state.revoked = true;
329            state.revocation_reason = Some("reward score below revocation threshold".to_string());
330        }
331    }
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
335pub struct TrustEvent {
336    pub agent_did: String,
337    pub event_type: String,
338    pub severity_weight: f64,
339    pub timestamp_secs: u64,
340    pub details: Option<String>,
341}
342
343#[derive(Debug, Clone, Serialize, Deserialize)]
344pub struct InteractionEdge {
345    pub from_did: String,
346    pub to_did: String,
347    pub interaction_count: u32,
348}
349
350impl InteractionEdge {
351    pub fn weight(&self) -> f64 {
352        (self.interaction_count as f64 / 100.0).min(1.0)
353    }
354}
355
356pub struct NetworkTrustEngine {
357    scores: Mutex<HashMap<String, f64>>,
358    edges: Mutex<HashMap<(String, String), InteractionEdge>>,
359}
360
361impl NetworkTrustEngine {
362    pub fn new() -> Self {
363        Self {
364            scores: Mutex::new(HashMap::new()),
365            edges: Mutex::new(HashMap::new()),
366        }
367    }
368
369    pub fn get_score(&self, agent_did: &str) -> f64 {
370        *self
371            .scores
372            .lock()
373            .unwrap_or_else(|e| e.into_inner())
374            .get(agent_did)
375            .unwrap_or(&500.0)
376    }
377
378    pub fn record_interaction(&self, from_did: &str, to_did: &str) {
379        let key = (from_did.to_string(), to_did.to_string());
380        let mut edges = self.edges.lock().unwrap_or_else(|e| e.into_inner());
381        edges
382            .entry(key)
383            .and_modify(|edge| edge.interaction_count += 1)
384            .or_insert_with(|| InteractionEdge {
385                from_did: from_did.to_string(),
386                to_did: to_did.to_string(),
387                interaction_count: 1,
388            });
389    }
390
391    pub fn process_trust_event(&self, event: TrustEvent) -> HashMap<String, f64> {
392        let mut deltas = HashMap::new();
393        {
394            let mut scores = self.scores.lock().unwrap_or_else(|e| e.into_inner());
395            let direct = event.severity_weight * 100.0;
396            let entry = scores.entry(event.agent_did.clone()).or_insert(500.0);
397            *entry = (*entry - direct).max(0.0);
398            deltas.insert(event.agent_did.clone(), -direct);
399        }
400        let edges = self.edges.lock().unwrap_or_else(|e| e.into_inner()).clone();
401        for ((from, to), edge) in edges {
402            if from == event.agent_did || to == event.agent_did {
403                let peer = if from == event.agent_did { to } else { from };
404                let propagated = event.severity_weight * edge.weight() * 30.0;
405                let mut scores = self.scores.lock().unwrap_or_else(|e| e.into_inner());
406                let entry = scores.entry(peer.clone()).or_insert(500.0);
407                *entry = (*entry - propagated).max(0.0);
408                deltas.insert(peer, -propagated);
409            }
410        }
411        deltas
412    }
413}
414
415impl Default for NetworkTrustEngine {
416    fn default() -> Self {
417        Self::new()
418    }
419}
420
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct ParticipantInfo {
423    pub agent_did: String,
424    pub trust_score: u32,
425    pub delegation_depth: u32,
426    pub contribution_weight: Option<f64>,
427}
428
429#[derive(Debug, Clone, Serialize, Deserialize)]
430pub struct RewardPool {
431    pub total_reward: f64,
432    pub task_id: String,
433    pub participants: Vec<ParticipantInfo>,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct RewardAllocation {
438    pub agent_did: String,
439    pub amount: f64,
440    pub percentage: f64,
441    pub strategy_used: String,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct DistributionResult {
446    pub task_id: String,
447    pub strategy: String,
448    pub allocations: Vec<RewardAllocation>,
449    pub total_distributed: f64,
450}
451
452pub trait RewardStrategy: Send + Sync {
453    fn distribute(&self, pool: &RewardPool) -> DistributionResult;
454}
455
456pub struct EqualSplitStrategy;
457
458impl RewardStrategy for EqualSplitStrategy {
459    fn distribute(&self, pool: &RewardPool) -> DistributionResult {
460        let n = pool.participants.len();
461        if n == 0 {
462            return DistributionResult {
463                task_id: pool.task_id.clone(),
464                strategy: "equal".to_string(),
465                allocations: Vec::new(),
466                total_distributed: 0.0,
467            };
468        }
469        let share = pool.total_reward / n as f64;
470        DistributionResult {
471            task_id: pool.task_id.clone(),
472            strategy: "equal".to_string(),
473            allocations: pool
474                .participants
475                .iter()
476                .map(|participant| RewardAllocation {
477                    agent_did: participant.agent_did.clone(),
478                    amount: share,
479                    percentage: 100.0 / n as f64,
480                    strategy_used: "equal".to_string(),
481                })
482                .collect(),
483            total_distributed: pool.total_reward,
484        }
485    }
486}
487
488pub struct TrustWeightedStrategy;
489
490impl RewardStrategy for TrustWeightedStrategy {
491    fn distribute(&self, pool: &RewardPool) -> DistributionResult {
492        let total_trust = pool
493            .participants
494            .iter()
495            .map(|p| p.trust_score as f64)
496            .sum::<f64>();
497        let allocations = pool
498            .participants
499            .iter()
500            .map(|participant| {
501                let share = if total_trust > 0.0 {
502                    participant.trust_score as f64 / total_trust
503                } else {
504                    0.0
505                };
506                RewardAllocation {
507                    agent_did: participant.agent_did.clone(),
508                    amount: pool.total_reward * share,
509                    percentage: share * 100.0,
510                    strategy_used: "trust_weighted".to_string(),
511                }
512            })
513            .collect::<Vec<_>>();
514        DistributionResult {
515            task_id: pool.task_id.clone(),
516            strategy: "trust_weighted".to_string(),
517            total_distributed: allocations.iter().map(|item| item.amount).sum(),
518            allocations,
519        }
520    }
521}
522
523pub struct HierarchicalStrategy {
524    pub decay_factor: f64,
525}
526
527impl Default for HierarchicalStrategy {
528    fn default() -> Self {
529        Self { decay_factor: 0.7 }
530    }
531}
532
533impl RewardStrategy for HierarchicalStrategy {
534    fn distribute(&self, pool: &RewardPool) -> DistributionResult {
535        let weights = pool
536            .participants
537            .iter()
538            .map(|participant| self.decay_factor.powi(participant.delegation_depth as i32))
539            .collect::<Vec<_>>();
540        let total_weight = weights.iter().sum::<f64>();
541        let allocations = pool
542            .participants
543            .iter()
544            .zip(weights.iter())
545            .map(|(participant, weight)| {
546                let share = if total_weight > 0.0 {
547                    weight / total_weight
548                } else {
549                    0.0
550                };
551                RewardAllocation {
552                    agent_did: participant.agent_did.clone(),
553                    amount: pool.total_reward * share,
554                    percentage: share * 100.0,
555                    strategy_used: "hierarchical".to_string(),
556                }
557            })
558            .collect::<Vec<_>>();
559        DistributionResult {
560            task_id: pool.task_id.clone(),
561            strategy: "hierarchical".to_string(),
562            total_distributed: allocations.iter().map(|item| item.amount).sum(),
563            allocations,
564        }
565    }
566}
567
568pub struct ContributionWeightedStrategy;
569
570impl RewardStrategy for ContributionWeightedStrategy {
571    fn distribute(&self, pool: &RewardPool) -> DistributionResult {
572        let total_weight = pool
573            .participants
574            .iter()
575            .map(|participant| participant.contribution_weight.unwrap_or(0.0))
576            .sum::<f64>();
577        let allocations = pool
578            .participants
579            .iter()
580            .map(|participant| {
581                let weight = participant.contribution_weight.unwrap_or(0.0);
582                let share = if total_weight > 0.0 {
583                    weight / total_weight
584                } else {
585                    0.0
586                };
587                RewardAllocation {
588                    agent_did: participant.agent_did.clone(),
589                    amount: pool.total_reward * share,
590                    percentage: share * 100.0,
591                    strategy_used: "contribution".to_string(),
592                }
593            })
594            .collect::<Vec<_>>();
595        DistributionResult {
596            task_id: pool.task_id.clone(),
597            strategy: "contribution".to_string(),
598            total_distributed: allocations.iter().map(|item| item.amount).sum(),
599            allocations,
600        }
601    }
602}
603
604pub struct RewardDistributor {
605    default_strategy: String,
606    strategies: HashMap<String, Box<dyn RewardStrategy>>,
607}
608
609impl RewardDistributor {
610    pub fn new(default_strategy: Option<&str>) -> Self {
611        let mut strategies: HashMap<String, Box<dyn RewardStrategy>> = HashMap::new();
612        strategies.insert("equal".to_string(), Box::new(EqualSplitStrategy));
613        strategies.insert(
614            "trust_weighted".to_string(),
615            Box::new(TrustWeightedStrategy),
616        );
617        strategies.insert(
618            "hierarchical".to_string(),
619            Box::new(HierarchicalStrategy::default()),
620        );
621        strategies.insert(
622            "contribution".to_string(),
623            Box::new(ContributionWeightedStrategy),
624        );
625        Self {
626            default_strategy: default_strategy.unwrap_or("trust_weighted").to_string(),
627            strategies,
628        }
629    }
630
631    pub fn register_strategy(&mut self, name: &str, strategy: Box<dyn RewardStrategy>) {
632        self.strategies.insert(name.to_string(), strategy);
633    }
634
635    pub fn distribute(
636        &self,
637        pool: &RewardPool,
638        strategy_name: Option<&str>,
639    ) -> Result<DistributionResult, String> {
640        let strategy = self
641            .strategies
642            .get(strategy_name.unwrap_or(&self.default_strategy))
643            .ok_or_else(|| "unknown reward strategy".to_string())?;
644        Ok(strategy.distribute(pool))
645    }
646}
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651
652    #[test]
653    fn reward_engine_tracks_dimensions() {
654        let engine = RewardEngine::new(None);
655        engine.record_policy_compliance("did:mesh:agent", true, Some("baseline"));
656        engine.record_security_event("did:mesh:agent", false, "boundary breach");
657        let score = engine.get_agent_score("did:mesh:agent");
658        assert!(score.dimensions.contains_key("policy_compliance"));
659        assert!(score.dimensions.contains_key("security_posture"));
660    }
661
662    #[test]
663    fn network_trust_engine_propagates() {
664        let engine = NetworkTrustEngine::new();
665        engine.record_interaction("a", "b");
666        let deltas = engine.process_trust_event(TrustEvent {
667            agent_did: "a".into(),
668            event_type: "policy_violation".into(),
669            severity_weight: 0.8,
670            timestamp_secs: reward_now(),
671            details: None,
672        });
673        assert!(deltas.contains_key("a"));
674        assert!(deltas.contains_key("b"));
675    }
676
677    #[test]
678    fn reward_distributor_supports_trust_weighted() {
679        let distributor = RewardDistributor::new(None);
680        let result = distributor
681            .distribute(
682                &RewardPool {
683                    total_reward: 100.0,
684                    task_id: "task-1".into(),
685                    participants: vec![
686                        ParticipantInfo {
687                            agent_did: "a".into(),
688                            trust_score: 800,
689                            delegation_depth: 0,
690                            contribution_weight: None,
691                        },
692                        ParticipantInfo {
693                            agent_did: "b".into(),
694                            trust_score: 200,
695                            delegation_depth: 1,
696                            contribution_weight: None,
697                        },
698                    ],
699                },
700                Some("trust_weighted"),
701            )
702            .unwrap();
703        assert_eq!(result.allocations.len(), 2);
704        assert!(result.allocations[0].amount > result.allocations[1].amount);
705    }
706}