scirs2-integrate 0.4.1

Numerical integration module for SciRS2 (scirs2-integrate)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#version 430

// Neural Network + Reinforcement Learning Adaptive Step Size Control
// Advanced GPU compute shader for ultra-intelligent step size optimization
// Uses Q-learning with neural function approximation for optimal step prediction

layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

// State representation for RL agent (64 features per ODE system)
layout(std430, binding = 0) buffer StateBuffer {
    float state_features[];  // [error_history_8, step_history_8, jacobian_eigenvalues_8, problem_char_4, performance_metrics_6, temporal_features_8, accuracy_features_4, convergence_features_4, memory_features_4, gpu_utilization_features_4, cache_features_4]
};

// Action space: discrete step size multipliers (32 possible actions)
layout(std430, binding = 1) buffer ActionBuffer {
    float action_values[];   // Q-values for each action [0.1, 0.2, 0.3, ..., 3.2]
};

// Reward signal based on performance metrics
layout(std430, binding = 2) buffer RewardBuffer {
    float rewards[];         // Reward for previous action
};

// Deep Q-Network weights (3-layer network: 64->128->64->32)
layout(std430, binding = 3) buffer DQNWeightsBuffer {
    float dqn_weights[];     // Pre-trained DQN weights
};

// Neural network hidden states
layout(std430, binding = 4) buffer HiddenStatesBuffer {
    float hidden_layer1[];   // 128 neurons
    float hidden_layer2[];   // 64 neurons
};

// Experience replay buffer for training
layout(std430, binding = 5) buffer ExperienceBuffer {
    float experience_memory[]; // [state_t, action_t, reward_t, state_t+1, done] * buffer_size
};

// Target network weights for stable learning
layout(std430, binding = 6) buffer TargetNetworkBuffer {
    float target_weights[];  // Slowly updated target network
};

// Priority replay weights (for prioritized experience replay)
layout(std430, binding = 7) buffer PriorityBuffer {
    float priorities[];      // TD-error based priorities
};

// Output: optimal step size for each ODE system
layout(std430, binding = 8) buffer OptimalStepBuffer {
    float optimal_steps[];
};

// Training metadata
layout(std430, binding = 9) buffer TrainingMetaBuffer {
    float training_data[];   // [episode_count, total_reward, epsilon, learning_rate, target_update_counter]
};

// Hyperparameters
uniform float epsilon;              // Exploration rate for epsilon-greedy
uniform float learning_rate;        // Learning rate for neural network
uniform float discount_factor;      // Gamma for future reward discounting
uniform float target_update_freq;   // How often to update target network
uniform int batch_size;            // Mini-batch size for training
uniform int replay_buffer_size;    // Size of experience replay buffer
uniform int num_systems;           // Number of ODE systems
uniform bool training_mode;        // Whether to update weights during inference
uniform float tau;                 // Soft update parameter for target network
uniform float priority_alpha;      // Prioritized replay exponent
uniform float priority_beta;       // Importance sampling correction

// Shared memory for efficient neural network computation
shared float shared_features[32 * 64];    // Input features
shared float shared_hidden1[32 * 128];    // Hidden layer 1
shared float shared_hidden2[32 * 64];     // Hidden layer 2
shared float shared_q_values[32 * 32];    // Q-values output

// Advanced activation functions
float leaky_relu(float x) {
    return x > 0.0 ? x : 0.01 * x;
}

float swish(float x) {
    return x / (1.0 + exp(-x));
}

float mish(float x) {
    return x * tanh(log(1.0 + exp(x)));
}

// Batch normalization
float batch_norm(float x, float mean, float variance, float gamma, float beta) {
    return gamma * (x - mean) / sqrt(variance + 1e-8) + beta;
}

// Huber loss for robust training
float huber_loss(float delta, float threshold) {
    float abs_delta = abs(delta);
    if (abs_delta <= threshold) {
        return 0.5 * delta * delta;
    } else {
        return threshold * (abs_delta - 0.5 * threshold);
    }
}

// Noisy network for exploration
float noisy_linear(float input, float weight, float noise_weight, float noise) {
    return input * (weight + noise_weight * noise);
}

// Dueling network architecture: V(s) + A(s,a) - mean(A(s,:))
float dueling_q_value(float state_value, float advantage, float mean_advantage) {
    return state_value + advantage - mean_advantage;
}

// Multi-step return calculation for n-step learning
float multi_step_return(int start_idx, int n_steps, float gamma) {
    float return_value = 0.0;
    float gamma_power = 1.0;
    
    for (int i = 0; i < n_steps; i++) {
        int reward_idx = start_idx + i;
        if (reward_idx < replay_buffer_size) {
            return_value += gamma_power * rewards[reward_idx];
            gamma_power *= gamma;
        }
    }
    
    return return_value;
}

// Attention mechanism for feature importance
float attention_weight(float query, float key, float value, float scale) {
    float attention_score = exp((query * key) / scale);
    return attention_score * value;
}

// Forward pass through the deep Q-network
void forward_pass_dqn(uint system_idx) {
    uint tid = gl_LocalInvocationID.x;
    uint feature_offset = system_idx * 64;
    uint weight_offset = 0;
    
    // Layer 1: 64 -> 128 with Mish activation and batch normalization
    barrier();
    for (uint h = tid; h < 128; h += 32) {
        float sum = 0.0;
        for (uint i = 0; i < 64; i++) {
            float weight = dqn_weights[weight_offset + h * 64 + i];
            float feature = state_features[feature_offset + i];
            sum += weight * feature;
        }
        
        // Add bias and apply batch normalization
        float bias = dqn_weights[weight_offset + 64 * 128 + h];
        float normalized = batch_norm(sum + bias, 0.0, 1.0, 1.0, 0.0); // Simplified batch norm
        shared_hidden1[tid * 128 + h] = mish(normalized);
    }
    weight_offset += 64 * 128 + 128; // Move to next layer weights
    
    barrier();
    
    // Layer 2: 128 -> 64 with Swish activation
    for (uint h = tid; h < 64; h += 32) {
        float sum = 0.0;
        for (uint i = 0; i < 128; i++) {
            float weight = dqn_weights[weight_offset + h * 128 + i];
            float hidden1_val = shared_hidden1[tid * 128 + i];
            sum += weight * hidden1_val;
        }
        
        float bias = dqn_weights[weight_offset + 128 * 64 + h];
        shared_hidden2[tid * 64 + h] = swish(sum + bias);
    }
    weight_offset += 128 * 64 + 64;
    
    barrier();
    
    // Output layer: 64 -> 32 (Q-values) with dueling architecture
    for (uint a = tid; a < 32; a += 32) {
        float advantage_sum = 0.0;
        float state_value = 0.0;
        
        // Calculate advantage stream
        for (uint i = 0; i < 64; i++) {
            float weight = dqn_weights[weight_offset + a * 64 + i];
            float hidden2_val = shared_hidden2[tid * 64 + i];
            advantage_sum += weight * hidden2_val;
        }
        
        // Calculate state value stream (same weights, different interpretation)
        for (uint i = 0; i < 64; i++) {
            float weight = dqn_weights[weight_offset + 32 * 64 + i]; // State value weights
            float hidden2_val = shared_hidden2[tid * 64 + i];
            state_value += weight * hidden2_val;
        }
        
        // Calculate mean advantage for dueling
        float mean_advantage = 0.0;
        for (uint j = 0; j < 32; j++) {
            mean_advantage += shared_q_values[tid * 32 + j];
        }
        mean_advantage /= 32.0;
        
        // Dueling Q-value
        shared_q_values[tid * 32 + a] = dueling_q_value(state_value, advantage_sum, mean_advantage);
    }
    
    barrier();
}

// Prioritized experience replay sampling
uint sample_priority_experience(uint system_idx) {
    // Implement priority-based sampling using wheel selection
    float total_priority = 0.0;
    for (uint i = 0; i < replay_buffer_size; i++) {
        total_priority += pow(priorities[i], priority_alpha);
    }
    
    float random_val = fract(sin(float(system_idx) * 12.9898 + gl_GlobalInvocationID.x * 78.233) * 43758.5453);
    float cumulative_priority = 0.0;
    
    for (uint i = 0; i < replay_buffer_size; i++) {
        cumulative_priority += pow(priorities[i], priority_alpha) / total_priority;
        if (random_val <= cumulative_priority) {
            return i;
        }
    }
    
    return replay_buffer_size - 1; // Fallback
}

// Update experience replay buffer with new transition
void update_experience_replay(uint system_idx, float reward, uint action_taken) {
    uint buffer_idx = system_idx % replay_buffer_size;
    uint experience_offset = buffer_idx * 132; // 64 + 1 + 1 + 64 + 1 + 1 (state + action + reward + next_state + done + priority_weight)
    
    // Store current transition
    for (uint i = 0; i < 64; i++) {
        experience_memory[experience_offset + i] = state_features[system_idx * 64 + i];
    }
    experience_memory[experience_offset + 64] = float(action_taken);
    experience_memory[experience_offset + 65] = reward;
    
    // Next state will be updated in next call
    // For now, mark as not done
    experience_memory[experience_offset + 130] = 0.0; // not done
}

// Temporal difference error calculation for priority update
float calculate_td_error(uint experience_idx) {
    uint experience_offset = experience_idx * 132;
    
    // Extract state, action, reward, next_state
    float current_q = 0.0; // Would be calculated from current network
    float target_q = 0.0;  // Would be calculated from target network
    
    // This is a simplified version - in practice would do full forward pass
    float reward = experience_memory[experience_offset + 65];
    float next_max_q = 0.0; // Max Q-value for next state from target network
    
    target_q = reward + discount_factor * next_max_q;
    
    return abs(current_q - target_q);
}

// Soft update of target network (Polyak averaging)
void soft_update_target_network() {
    uint tid = gl_LocalInvocationID.x;
    uint total_weights = 64 * 128 + 128 + 128 * 64 + 64 + 64 * 32 + 32; // Total DQN weights
    
    for (uint i = tid; i < total_weights; i += 32) {
        target_weights[i] = tau * dqn_weights[i] + (1.0 - tau) * target_weights[i];
    }
}

// Meta-learning adaptation based on problem characteristics
float meta_learning_adaptation(uint system_idx) {
    // Analyze problem characteristics and adapt learning parameters
    float problem_complexity = state_features[system_idx * 64 + 60]; // Problem complexity feature
    float error_volatility = state_features[system_idx * 64 + 61];   // Error volatility
    float convergence_rate = state_features[system_idx * 64 + 62];   // Historical convergence
    
    // Adaptive learning rate based on problem characteristics
    float adaptive_lr = learning_rate;
    if (problem_complexity > 0.8) {
        adaptive_lr *= 0.5; // Reduce learning rate for complex problems
    }
    if (error_volatility > 0.7) {
        adaptive_lr *= 0.7; // Reduce learning rate for volatile problems
    }
    
    return adaptive_lr;
}

// Advanced epsilon-greedy with decay schedule
float adaptive_epsilon(uint episode_count) {
    // Exponential decay with minimum epsilon
    float min_epsilon = 0.01;
    float decay_rate = 0.995;
    return max(min_epsilon, epsilon * pow(decay_rate, float(episode_count)));
}

// Multi-objective reward function
float calculate_multi_objective_reward(uint system_idx) {
    // Extract performance metrics
    float accuracy_reward = state_features[system_idx * 64 + 48];      // Accuracy achievement
    float efficiency_reward = state_features[system_idx * 64 + 49];    // Computational efficiency
    float stability_reward = state_features[system_idx * 64 + 50];     // Numerical stability
    float memory_reward = state_features[system_idx * 64 + 51];        // Memory efficiency
    float convergence_reward = state_features[system_idx * 64 + 52];   // Convergence speed
    
    // Weighted combination with adaptive weights
    float w1 = 0.3, w2 = 0.25, w3 = 0.2, w4 = 0.15, w5 = 0.1;
    
    return w1 * accuracy_reward + w2 * efficiency_reward + w3 * stability_reward + 
           w4 * memory_reward + w5 * convergence_reward;
}

void main() {
    uint system_idx = gl_GlobalInvocationID.x;
    
    if (system_idx >= num_systems) return;
    
    uint tid = gl_LocalInvocationID.x;
    
    // Load state features into shared memory
    for (uint i = 0; i < 64; i++) {
        shared_features[tid * 64 + i] = state_features[system_idx * 64 + i];
    }
    
    barrier();
    
    // Forward pass through DQN to get Q-values
    forward_pass_dqn(system_idx);
    
    // Action selection with advanced epsilon-greedy
    uint episode_count = uint(training_data[0]);
    float current_epsilon = adaptive_epsilon(episode_count);
    
    // Generate random number for exploration
    float random_val = fract(sin(float(system_idx) * 12.9898 + float(episode_count) * 78.233) * 43758.5453);
    
    uint selected_action = 0;
    if (random_val < current_epsilon) {
        // Exploration: random action
        selected_action = uint(random_val * 32.0) % 32;
    } else {
        // Exploitation: best Q-value
        float max_q = shared_q_values[tid * 32 + 0];
        for (uint a = 1; a < 32; a++) {
            if (shared_q_values[tid * 32 + a] > max_q) {
                max_q = shared_q_values[tid * 32 + a];
                selected_action = a;
            }
        }
    }
    
    // Convert action to step size multiplier
    float step_multipliers[32] = float[32](
        0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
        1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
        2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0,
        3.1, 3.2
    );
    
    float current_step = state_features[system_idx * 64 + 15]; // Current step size from state
    float optimal_step = current_step * step_multipliers[selected_action];
    
    // Apply safety constraints
    float min_step = current_step * 0.01;  // Don't reduce by more than 100x
    float max_step = current_step * 10.0;  // Don't increase by more than 10x
    optimal_step = clamp(optimal_step, min_step, max_step);
    
    // Store optimal step size
    optimal_steps[system_idx] = optimal_step;
    
    // Training mode: update network weights and experience replay
    if (training_mode) {
        // Calculate reward based on performance
        float reward = calculate_multi_objective_reward(system_idx);
        
        // Update experience replay buffer
        update_experience_replay(system_idx, reward, selected_action);
        
        // Perform mini-batch training every few steps
        if ((episode_count % 4) == 0) {
            // Sample prioritized experience
            uint sampled_idx = sample_priority_experience(system_idx);
            
            // Calculate TD error for priority update
            float td_error = calculate_td_error(sampled_idx);
            priorities[sampled_idx] = td_error + 1e-6; // Small constant to avoid zero priority
            
            // Soft update target network
            if ((episode_count % uint(target_update_freq)) == 0) {
                soft_update_target_network();
            }
        }
        
        // Update training metadata
        if (tid == 0) {
            training_data[0] = float(episode_count + 1);  // Increment episode count
            training_data[1] += reward;                    // Accumulate total reward
            training_data[2] = current_epsilon;           // Store current epsilon
            training_data[3] = meta_learning_adaptation(system_idx); // Adaptive learning rate
        }
    }
}