1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//! Learning module for HOPE Agents
//!
//! This module provides reinforcement learning capabilities for agents including:
//! - Q-Learning
//! - SARSA
//! - Expected SARSA
//! - Temporal Difference learning
//!
//! ## Overview
//!
//! The learning engine allows agents to learn from experience and improve their
//! decision-making over time using various reinforcement learning algorithms.
//!
//! ## Basic Example
//!
//! ```rust
//! use hope_agents::{Agent, SimpleAgent, Observation, LearningConfig, LearningAlgorithm};
//!
//! // Create an agent with learning enabled
//! let mut agent = SimpleAgent::new("learning_agent");
//!
//! // Configure learning
//! let config = LearningConfig {
//! learning_rate: 0.1,
//! discount_factor: 0.99,
//! algorithm: LearningAlgorithm::QLearning,
//! ..Default::default()
//! };
//! agent.enable_learning(config);
//!
//! // Simulate learning loop
//! for _ in 0..10 {
//! // Observe environment
//! let obs = Observation::sensor("temperature", 25.0);
//! agent.observe(obs.clone());
//!
//! // Decide action
//! let action = agent.decide();
//!
//! // Execute action
//! let result = agent.execute(action.clone());
//!
//! // Learn from result
//! agent.learn(&obs, &action, &result);
//! }
//!
//! // Check learning progress
//! let engine = agent.learning_engine().unwrap();
//! println!("Total updates: {}", engine.total_updates());
//! println!("State-action pairs learned: {}", engine.state_action_count());
//! ```
//!
//! ## Advanced Example: Direct Learning Engine Usage
//!
//! ```rust
//! use hope_agents::learning::{
//! LearningEngine, LearningConfig, LearningAlgorithm,
//! StateId, ActionId, Experience
//! };
//!
//! // Create learning engine
//! let mut engine = LearningEngine::new(LearningConfig {
//! learning_rate: 0.1,
//! discount_factor: 0.99,
//! algorithm: LearningAlgorithm::QLearning,
//! epsilon: 0.1, // 10% exploration
//! ..Default::default()
//! });
//!
//! // Define states and actions
//! let state1 = StateId::from_string("state_1".to_string());
//! let state2 = StateId::from_string("state_2".to_string());
//! let action_a = ActionId::from_string("action_a".to_string());
//! let action_b = ActionId::from_string("action_b".to_string());
//! let actions = vec![action_a.clone(), action_b.clone()];
//!
//! // Update Q-values
//! engine.update_q_learning(&state1, &action_a, 1.0, &state2, &actions);
//!
//! // Get best action
//! let best = engine.get_best_action(&state1, &actions);
//!
//! // Use epsilon-greedy exploration
//! let action = engine.get_action_epsilon_greedy(&state1, &actions);
//!
//! // Experience replay
//! let exp = Experience::new(state1.clone(), action_a.clone(), 1.0, state2.clone(), false);
//! engine.add_experience(exp);
//! engine.replay_batch(32, &actions);
//! ```
pub use ;
pub use ;