strange_loop/nano_agent/
optimization.rs1use std::arch::x86_64::*;
4use std::mem;
5
6#[repr(align(64))] pub struct AlignedVector {
9 data: Vec<f32>,
10 capacity: usize,
11}
12
13impl AlignedVector {
14 pub fn new(capacity: usize) -> Self {
16 let aligned_capacity = (capacity + 15) & !15; let mut data = Vec::with_capacity(aligned_capacity);
18 data.resize(aligned_capacity, 0.0);
19
20 Self {
21 data,
22 capacity: aligned_capacity,
23 }
24 }
25
26 pub fn as_ptr(&self) -> *const f32 {
28 self.data.as_ptr()
29 }
30
31 pub fn as_mut_ptr(&mut self) -> *mut f32 {
33 self.data.as_mut_ptr()
34 }
35
36 pub fn len(&self) -> usize {
38 self.data.len()
39 }
40
41 pub fn is_empty(&self) -> bool {
43 self.data.is_empty()
44 }
45
46 #[target_feature(enable = "avx2")]
48 pub unsafe fn simd_add(&mut self, other: &AlignedVector) -> Result<(), &'static str> {
49 if self.len() != other.len() {
50 return Err("Vector lengths must match");
51 }
52
53 let len = self.len();
54 let chunks = len / 8; let self_ptr = self.as_mut_ptr();
57 let other_ptr = other.as_ptr();
58
59 for i in 0..chunks {
61 let offset = i * 8;
62
63 let a = _mm256_load_ps(self_ptr.add(offset));
65 let b = _mm256_load_ps(other_ptr.add(offset));
66
67 let result = _mm256_add_ps(a, b);
69
70 _mm256_store_ps(self_ptr.add(offset), result);
72 }
73
74 for i in (chunks * 8)..len {
76 *self_ptr.add(i) += *other_ptr.add(i);
77 }
78
79 Ok(())
80 }
81
82 #[target_feature(enable = "avx2")]
84 pub unsafe fn simd_dot(&self, other: &AlignedVector) -> Result<f32, &'static str> {
85 if self.len() != other.len() {
86 return Err("Vector lengths must match");
87 }
88
89 let len = self.len();
90 let chunks = len / 8;
91
92 let self_ptr = self.as_ptr();
93 let other_ptr = other.as_ptr();
94
95 let mut sum_vec = _mm256_setzero_ps();
97
98 for i in 0..chunks {
100 let offset = i * 8;
101
102 let a = _mm256_load_ps(self_ptr.add(offset));
103 let b = _mm256_load_ps(other_ptr.add(offset));
104
105 let product = _mm256_mul_ps(a, b);
107 sum_vec = _mm256_add_ps(sum_vec, product);
108 }
109
110 let mut result_array = [0.0f32; 8];
112 _mm256_store_ps(result_array.as_mut_ptr(), sum_vec);
113 let mut dot_product: f32 = result_array.iter().sum();
114
115 for i in (chunks * 8)..len {
117 dot_product += *self_ptr.add(i) * *other_ptr.add(i);
118 }
119
120 Ok(dot_product)
121 }
122
123 #[target_feature(enable = "avx2")]
125 pub unsafe fn simd_scale(&mut self, scalar: f32) {
126 let len = self.len();
127 let chunks = len / 8;
128
129 let self_ptr = self.as_mut_ptr();
130 let scalar_vec = _mm256_set1_ps(scalar); for i in 0..chunks {
134 let offset = i * 8;
135
136 let a = _mm256_load_ps(self_ptr.add(offset));
137 let result = _mm256_mul_ps(a, scalar_vec);
138 _mm256_store_ps(self_ptr.add(offset), result);
139 }
140
141 for i in (chunks * 8)..len {
143 *self_ptr.add(i) *= scalar;
144 }
145 }
146}
147
148#[repr(align(64))]
150pub struct AgentState {
151 pub position: [f32; 3],
153 pub velocity: [f32; 3],
154 pub acceleration: [f32; 3],
155 pub energy: f32,
156 pub active: bool,
157 _padding1: [u8; 31], pub parameters: AlignedVector,
161 pub last_update_ns: u128,
162 pub performance_score: f32,
163 _padding2: [u8; 36],
164
165 pub debug_info: String,
167 pub creation_time: std::time::Instant,
168}
169
170impl AgentState {
171 pub fn new(param_count: usize) -> Self {
172 Self {
173 position: [0.0; 3],
174 velocity: [0.0; 3],
175 acceleration: [0.0; 3],
176 energy: 1.0,
177 active: true,
178 _padding1: [0; 31],
179 parameters: AlignedVector::new(param_count),
180 last_update_ns: 0,
181 performance_score: 0.0,
182 _padding2: [0; 36],
183 debug_info: String::new(),
184 creation_time: std::time::Instant::now(),
185 }
186 }
187
188 pub fn simd_update(&mut self, dt: f32) {
190 unsafe {
191 let pos_ptr = self.position.as_mut_ptr();
193 let vel_ptr = self.velocity.as_ptr();
194
195 let mut pos_padded = [0.0f32; 4];
197 let mut vel_padded = [0.0f32; 4];
198
199 pos_padded[..3].copy_from_slice(&self.position);
200 vel_padded[..3].copy_from_slice(&self.velocity);
201
202 let pos_vec = _mm_load_ps(pos_padded.as_ptr());
203 let vel_vec = _mm_load_ps(vel_padded.as_ptr());
204 let dt_vec = _mm_set1_ps(dt);
205
206 let vel_scaled = _mm_mul_ps(vel_vec, dt_vec);
208 let new_pos = _mm_add_ps(pos_vec, vel_scaled);
209
210 _mm_store_ps(pos_padded.as_mut_ptr(), new_pos);
212 self.position.copy_from_slice(&pos_padded[..3]);
213 }
214 }
215}
216
217pub struct BatchProcessor {
219 positions: AlignedVector,
220 velocities: AlignedVector,
221 accelerations: AlignedVector,
222 agent_count: usize,
223}
224
225impl BatchProcessor {
226 pub fn new(max_agents: usize) -> Self {
227 Self {
228 positions: AlignedVector::new(max_agents * 3),
229 velocities: AlignedVector::new(max_agents * 3),
230 accelerations: AlignedVector::new(max_agents * 3),
231 agent_count: 0,
232 }
233 }
234
235 #[target_feature(enable = "avx2")]
237 pub unsafe fn batch_update_positions(&mut self, dt: f32) {
238 let len = self.agent_count * 3;
241 let chunks = len / 8;
242
243 let pos_ptr = self.positions.as_mut_ptr();
244 let vel_ptr = self.velocities.as_ptr();
245 let acc_ptr = self.accelerations.as_ptr();
246
247 let dt_vec = _mm256_set1_ps(dt);
248 let dt2_vec = _mm256_set1_ps(dt * dt * 0.5);
249
250 for i in 0..chunks {
251 let offset = i * 8;
252
253 let pos = _mm256_load_ps(pos_ptr.add(offset));
254 let vel = _mm256_load_ps(vel_ptr.add(offset));
255 let acc = _mm256_load_ps(acc_ptr.add(offset));
256
257 let vel_term = _mm256_mul_ps(vel, dt_vec);
259
260 let acc_term = _mm256_mul_ps(acc, dt2_vec);
262
263 let result = _mm256_add_ps(pos, _mm256_add_ps(vel_term, acc_term));
265
266 _mm256_store_ps(pos_ptr.add(offset), result);
267 }
268
269 for i in (chunks * 8)..len {
271 *pos_ptr.add(i) += *vel_ptr.add(i) * dt + 0.5 * *acc_ptr.add(i) * dt * dt;
272 }
273 }
274
275 #[target_feature(enable = "avx2")]
277 pub unsafe fn calculate_forces(&mut self) -> AlignedVector {
278 let mut forces = AlignedVector::new(self.agent_count * 3);
279
280 forces
284 }
285}
286
287pub struct AgentMemoryPool {
289 states: Vec<AgentState>,
290 free_indices: Vec<usize>,
291 capacity: usize,
292}
293
294impl AgentMemoryPool {
295 pub fn new(capacity: usize) -> Self {
296 let mut states = Vec::with_capacity(capacity);
297 let mut free_indices = Vec::with_capacity(capacity);
298
299 for i in 0..capacity {
300 states.push(AgentState::new(16)); free_indices.push(i);
302 }
303
304 Self {
305 states,
306 free_indices,
307 capacity,
308 }
309 }
310
311 pub fn allocate_agent(&mut self) -> Option<usize> {
312 self.free_indices.pop()
313 }
314
315 pub fn deallocate_agent(&mut self, index: usize) {
316 if index < self.capacity {
317 self.free_indices.push(index);
318 }
319 }
320
321 pub fn get_state(&self, index: usize) -> Option<&AgentState> {
322 self.states.get(index)
323 }
324
325 pub fn get_state_mut(&mut self, index: usize) -> Option<&mut AgentState> {
326 self.states.get_mut(index)
327 }
328}
329
330#[cfg(test)]
331mod tests {
332 use super::*;
333
334 #[test]
335 fn test_aligned_vector_creation() {
336 let vec = AlignedVector::new(100);
337 assert_eq!(vec.len(), 112); assert_eq!(vec.as_ptr() as usize % 64, 0); }
340
341 #[test]
342 fn test_simd_operations() {
343 let mut a = AlignedVector::new(16);
344 let mut b = AlignedVector::new(16);
345
346 for i in 0..16 {
348 unsafe {
349 *a.as_mut_ptr().add(i) = i as f32;
350 *b.as_mut_ptr().add(i) = (i * 2) as f32;
351 }
352 }
353
354 unsafe {
355 a.simd_add(&b).unwrap();
357
358 let dot = a.simd_dot(&b).unwrap();
360 assert!(dot > 0.0);
361
362 a.simd_scale(2.0);
364 }
365 }
366
367 #[test]
368 fn test_agent_state_alignment() {
369 let state = AgentState::new(16);
370 let ptr = &state as *const AgentState as usize;
371 assert_eq!(ptr % 64, 0); }
373
374 #[test]
375 fn test_memory_pool() {
376 let mut pool = AgentMemoryPool::new(10);
377
378 let agent1 = pool.allocate_agent().unwrap();
379 let agent2 = pool.allocate_agent().unwrap();
380
381 assert_ne!(agent1, agent2);
382
383 pool.deallocate_agent(agent1);
384 let agent3 = pool.allocate_agent().unwrap();
385 assert_eq!(agent1, agent3); }
387}