chess_vector_engine/utils/
lazy.rs

1use crate::errors::{ChessEngineError, Result};
2use ndarray::Array1;
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, RwLock};
6use std::time::Instant;
7
8/// Lazy-loaded value that is computed on first access
9pub struct Lazy<T> {
10    data: RwLock<Option<T>>,
11    initializer: Box<dyn Fn() -> Result<T> + Send + Sync>,
12}
13
14impl<T> Lazy<T> {
15    /// Create a new lazy value
16    pub fn new<F>(initializer: F) -> Self
17    where
18        F: Fn() -> Result<T> + Send + Sync + 'static,
19    {
20        Self {
21            data: RwLock::new(None),
22            initializer: Box::new(initializer),
23        }
24    }
25
26    /// Get the value, computing it if necessary
27    pub fn get(&self) -> Result<&T> {
28        // First, try to read without blocking
29        {
30            let read_guard = self.data.read().unwrap();
31            if read_guard.is_some() {
32                // Safe because we know it's Some and we hold the read lock
33                let ptr = read_guard.as_ref().unwrap() as *const T;
34                unsafe {
35                    return Ok(&*ptr);
36                }
37            }
38        }
39
40        // Need to initialize
41        let mut write_guard = self.data.write().unwrap();
42
43        // Double-check pattern
44        if write_guard.is_none() {
45            let value = (self.initializer)()?;
46            *write_guard = Some(value);
47        }
48
49        // Safe because we know it's Some and we hold the write lock
50        let ptr = write_guard.as_ref().unwrap() as *const T;
51        unsafe { Ok(&*ptr) }
52    }
53
54    /// Check if the value has been initialized
55    pub fn is_initialized(&self) -> bool {
56        self.data.read().unwrap().is_some()
57    }
58
59    /// Force initialization without returning the value
60    pub fn initialize(&self) -> Result<()> {
61        self.get().map(|_| ())
62    }
63
64    /// Clear the lazy value, forcing re-initialization on next access
65    pub fn clear(&self) {
66        *self.data.write().unwrap() = None;
67    }
68}
69
70/// Lazy collection that loads items on-demand
71pub struct LazyCollection<K, V> {
72    items: RwLock<HashMap<K, V>>,
73    loader: Box<dyn Fn(&K) -> Result<V> + Send + Sync>,
74    cache_size_limit: Option<usize>,
75}
76
77impl<K, V> LazyCollection<K, V>
78where
79    K: Clone + Eq + std::hash::Hash,
80    V: Clone,
81{
82    /// Create a new lazy collection
83    pub fn new<F>(loader: F) -> Self
84    where
85        F: Fn(&K) -> Result<V> + Send + Sync + 'static,
86    {
87        Self {
88            items: RwLock::new(HashMap::new()),
89            loader: Box::new(loader),
90            cache_size_limit: None,
91        }
92    }
93
94    /// Create a lazy collection with a cache size limit
95    pub fn with_cache_limit<F>(loader: F, cache_limit: usize) -> Self
96    where
97        F: Fn(&K) -> Result<V> + Send + Sync + 'static,
98    {
99        Self {
100            items: RwLock::new(HashMap::new()),
101            loader: Box::new(loader),
102            cache_size_limit: Some(cache_limit),
103        }
104    }
105
106    /// Get an item, loading it if necessary
107    pub fn get(&self, key: &K) -> Result<V> {
108        // Try to read from cache first
109        {
110            let read_guard = self.items.read().unwrap();
111            if let Some(value) = read_guard.get(key) {
112                return Ok(value.clone());
113            }
114        }
115
116        // Load the item
117        let value = (self.loader)(key)?;
118
119        // Cache the loaded value
120        {
121            let mut write_guard = self.items.write().unwrap();
122
123            // Check cache size limit
124            if let Some(limit) = self.cache_size_limit {
125                if write_guard.len() >= limit {
126                    // Remove a random item to make space
127                    if let Some(key_to_remove) = write_guard.keys().next().cloned() {
128                        write_guard.remove(&key_to_remove);
129                    }
130                }
131            }
132
133            write_guard.insert(key.clone(), value.clone());
134        }
135
136        Ok(value)
137    }
138
139    /// Check if an item is cached
140    pub fn is_cached(&self, key: &K) -> bool {
141        self.items.read().unwrap().contains_key(key)
142    }
143
144    /// Preload an item into the cache
145    pub fn preload(&self, key: &K) -> Result<()> {
146        self.get(key).map(|_| ())
147    }
148
149    /// Get cache statistics
150    pub fn cache_stats(&self) -> LazyCollectionStats {
151        let items = self.items.read().unwrap();
152        LazyCollectionStats {
153            cached_items: items.len(),
154            cache_limit: self.cache_size_limit,
155        }
156    }
157
158    /// Clear the cache
159    pub fn clear_cache(&self) {
160        self.items.write().unwrap().clear();
161    }
162}
163
164/// Statistics for lazy collections
165#[derive(Debug, Clone)]
166pub struct LazyCollectionStats {
167    pub cached_items: usize,
168    pub cache_limit: Option<usize>,
169}
170
171/// Lazy file loader for large datasets
172pub struct LazyFileLoader {
173    base_path: PathBuf,
174    loaded_files: RwLock<HashMap<String, Vec<u8>>>,
175    file_metadata: RwLock<HashMap<String, FileMetadata>>,
176    max_cache_size: usize,
177    current_cache_size: RwLock<usize>,
178}
179
180#[derive(Debug, Clone)]
181struct FileMetadata {
182    size: usize,
183    last_accessed: Instant,
184    load_count: usize,
185}
186
187impl LazyFileLoader {
188    /// Create a new lazy file loader
189    pub fn new<P: AsRef<Path>>(base_path: P, max_cache_size: usize) -> Self {
190        Self {
191            base_path: base_path.as_ref().to_path_buf(),
192            loaded_files: RwLock::new(HashMap::new()),
193            file_metadata: RwLock::new(HashMap::new()),
194            max_cache_size,
195            current_cache_size: RwLock::new(0),
196        }
197    }
198
199    /// Load a file, using cache if available
200    pub fn load_file(&self, filename: &str) -> Result<Vec<u8>> {
201        // Check cache first
202        {
203            let mut metadata = self.file_metadata.write().unwrap();
204            if let Some(meta) = metadata.get_mut(filename) {
205                meta.last_accessed = Instant::now();
206                meta.load_count += 1;
207
208                let files = self.loaded_files.read().unwrap();
209                if let Some(data) = files.get(filename) {
210                    return Ok(data.clone());
211                }
212            }
213        }
214
215        // Load from disk
216        let file_path = self.base_path.join(filename);
217        let data = std::fs::read(&file_path).map_err(|e| {
218            ChessEngineError::IoError(format!("Failed to read file {}: {}", filename, e))
219        })?;
220
221        let file_size = data.len();
222
223        // Check if we need to evict files to make space
224        self.evict_if_necessary(file_size)?;
225
226        // Cache the loaded file
227        {
228            let mut files = self.loaded_files.write().unwrap();
229            let mut metadata = self.file_metadata.write().unwrap();
230            let mut cache_size = self.current_cache_size.write().unwrap();
231
232            files.insert(filename.to_string(), data.clone());
233            metadata.insert(
234                filename.to_string(),
235                FileMetadata {
236                    size: file_size,
237                    last_accessed: Instant::now(),
238                    load_count: 1,
239                },
240            );
241            *cache_size += file_size;
242        }
243
244        Ok(data)
245    }
246
247    /// Evict files from cache if necessary
248    fn evict_if_necessary(&self, needed_size: usize) -> Result<()> {
249        let current_size = *self.current_cache_size.read().unwrap();
250
251        if current_size + needed_size <= self.max_cache_size {
252            return Ok(()); // No eviction needed
253        }
254
255        // Calculate how much space we need to free
256        let space_to_free = (current_size + needed_size) - self.max_cache_size;
257
258        // Get files sorted by access time (LRU)
259        let files_to_evict = {
260            let metadata = self.file_metadata.read().unwrap();
261            let mut file_list: Vec<_> = metadata
262                .iter()
263                .map(|(name, meta)| (name.clone(), meta.last_accessed, meta.size))
264                .collect();
265
266            file_list.sort_by_key(|(_, access_time, _)| *access_time);
267
268            let mut freed_space = 0;
269            let mut to_evict = Vec::new();
270
271            for (name, _, size) in file_list {
272                to_evict.push(name);
273                freed_space += size;
274                if freed_space >= space_to_free {
275                    break;
276                }
277            }
278
279            to_evict
280        };
281
282        // Evict the selected files
283        {
284            let mut files = self.loaded_files.write().unwrap();
285            let mut metadata = self.file_metadata.write().unwrap();
286            let mut cache_size = self.current_cache_size.write().unwrap();
287
288            for filename in files_to_evict {
289                if let Some(meta) = metadata.remove(&filename) {
290                    files.remove(&filename);
291                    *cache_size = cache_size.saturating_sub(meta.size);
292                }
293            }
294        }
295
296        Ok(())
297    }
298
299    /// Get cache statistics
300    pub fn cache_stats(&self) -> LazyFileLoaderStats {
301        let files = self.loaded_files.read().unwrap();
302        let current_size = *self.current_cache_size.read().unwrap();
303        let metadata = self.file_metadata.read().unwrap();
304
305        let total_loads = metadata.values().map(|m| m.load_count).sum();
306
307        LazyFileLoaderStats {
308            cached_files: files.len(),
309            cache_size: current_size,
310            max_cache_size: self.max_cache_size,
311            total_file_loads: total_loads,
312        }
313    }
314
315    /// Clear all cached files
316    pub fn clear_cache(&self) {
317        let mut files = self.loaded_files.write().unwrap();
318        let mut metadata = self.file_metadata.write().unwrap();
319        let mut cache_size = self.current_cache_size.write().unwrap();
320
321        files.clear();
322        metadata.clear();
323        *cache_size = 0;
324    }
325}
326
327/// Statistics for lazy file loader
328#[derive(Debug, Clone)]
329pub struct LazyFileLoaderStats {
330    pub cached_files: usize,
331    pub cache_size: usize,
332    pub max_cache_size: usize,
333    pub total_file_loads: usize,
334}
335
336/// Lazy position dataset for chess training data
337pub struct LazyPositionDataset {
338    file_loader: LazyFileLoader,
339    position_cache: LazyCollection<String, Vec<(Array1<f32>, f32)>>,
340    format_parsers:
341        HashMap<String, Box<dyn Fn(&[u8]) -> Result<Vec<(Array1<f32>, f32)>> + Send + Sync>>,
342}
343
344impl LazyPositionDataset {
345    /// Create a new lazy position dataset
346    pub fn new<P: AsRef<Path>>(base_path: P, max_cache_size: usize) -> Self {
347        let file_loader = LazyFileLoader::new(base_path, max_cache_size);
348
349        let position_cache = LazyCollection::with_cache_limit(
350            |_filename: &String| {
351                // This will be properly implemented by calling the method directly
352                Err(ChessEngineError::IoError("Not implemented".to_string()))
353            },
354            1000, // Cache up to 1000 position sets
355        );
356
357        let mut format_parsers: HashMap<
358            String,
359            Box<dyn Fn(&[u8]) -> Result<Vec<(Array1<f32>, f32)>> + Send + Sync>,
360        > = HashMap::new();
361
362        // Add JSON parser
363        format_parsers.insert(
364            "json".to_string(),
365            Box::new(|data| Self::parse_json_positions(data)),
366        );
367
368        // Add binary parser
369        format_parsers.insert(
370            "bin".to_string(),
371            Box::new(|data| Self::parse_binary_positions(data)),
372        );
373
374        Self {
375            file_loader,
376            position_cache,
377            format_parsers,
378        }
379    }
380
381    /// Load positions from a file
382    pub fn load_positions(&self, filename: &str) -> Result<Vec<(Array1<f32>, f32)>> {
383        self.position_cache.get(&filename.to_string())
384    }
385
386    /// Implementation of position loading
387    fn load_positions_impl(&self, filename: &str) -> Result<Vec<(Array1<f32>, f32)>> {
388        let data = self.file_loader.load_file(filename)?;
389
390        // Determine file format from extension
391        let extension = Path::new(filename)
392            .extension()
393            .and_then(|ext| ext.to_str())
394            .unwrap_or("");
395
396        if let Some(parser) = self.format_parsers.get(extension) {
397            parser(&data)
398        } else {
399            Err(ChessEngineError::IoError(format!(
400                "Unsupported file format: {}",
401                extension
402            )))
403        }
404    }
405
406    /// Parse JSON positions
407    fn parse_json_positions(data: &[u8]) -> Result<Vec<(Array1<f32>, f32)>> {
408        let text = std::str::from_utf8(data)
409            .map_err(|e| ChessEngineError::IoError(format!("Invalid UTF-8: {}", e)))?;
410
411        let mut positions = Vec::new();
412
413        for line in text.lines() {
414            if line.trim().is_empty() || line.trim().starts_with('#') {
415                continue;
416            }
417
418            let json_pos: serde_json::Value = serde_json::from_str(line)?;
419
420            if let (Some(vector), Some(eval)) = (json_pos.get("vector"), json_pos.get("evaluation"))
421            {
422                if let (Some(vector_array), Some(eval_number)) = (vector.as_array(), eval.as_f64())
423                {
424                    let vector_data: std::result::Result<Vec<f32>, ChessEngineError> = vector_array
425                        .iter()
426                        .map(|v| {
427                            v.as_f64().ok_or_else(|| {
428                                ChessEngineError::IoError("Invalid vector element".to_string())
429                            })
430                        })
431                        .map(|r| r.map(|v| v as f32))
432                        .collect();
433
434                    if let Ok(vector_data) = vector_data {
435                        let array = Array1::from_vec(vector_data);
436                        positions.push((array, eval_number as f32));
437                    }
438                }
439            }
440        }
441
442        Ok(positions)
443    }
444
445    /// Parse binary positions
446    fn parse_binary_positions(data: &[u8]) -> Result<Vec<(Array1<f32>, f32)>> {
447        if data.len() < 8 {
448            return Err(ChessEngineError::IoError("File too small".to_string()));
449        }
450
451        // Read header: [version: u32, count: u32]
452        let version = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
453        let count = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
454
455        if version != 1 {
456            return Err(ChessEngineError::IoError(format!(
457                "Unsupported version: {}",
458                version
459            )));
460        }
461
462        let mut positions = Vec::with_capacity(count as usize);
463        let mut offset = 8;
464
465        for _ in 0..count {
466            if offset + 4 > data.len() {
467                break;
468            }
469
470            // Read vector size
471            let vector_size = u32::from_le_bytes([
472                data[offset],
473                data[offset + 1],
474                data[offset + 2],
475                data[offset + 3],
476            ]) as usize;
477            offset += 4;
478
479            // Read vector data
480            let vector_bytes = vector_size * 4;
481            if offset + vector_bytes + 4 > data.len() {
482                break;
483            }
484
485            let vector_data = unsafe {
486                std::slice::from_raw_parts(data[offset..].as_ptr() as *const f32, vector_size)
487            };
488            let vector = Array1::from_vec(vector_data.to_vec());
489            offset += vector_bytes;
490
491            // Read evaluation
492            let evaluation = f32::from_le_bytes([
493                data[offset],
494                data[offset + 1],
495                data[offset + 2],
496                data[offset + 3],
497            ]);
498            offset += 4;
499
500            positions.push((vector, evaluation));
501        }
502
503        Ok(positions)
504    }
505
506    /// Preload a set of files
507    pub fn preload_files(&self, filenames: &[&str]) -> Result<()> {
508        for filename in filenames {
509            self.position_cache.preload(&filename.to_string())?;
510        }
511        Ok(())
512    }
513
514    /// Get dataset statistics
515    pub fn stats(&self) -> LazyDatasetStats {
516        LazyDatasetStats {
517            file_loader_stats: self.file_loader.cache_stats(),
518            position_cache_stats: self.position_cache.cache_stats(),
519        }
520    }
521}
522
523/// Statistics for lazy dataset
524#[derive(Debug, Clone)]
525pub struct LazyDatasetStats {
526    pub file_loader_stats: LazyFileLoaderStats,
527    pub position_cache_stats: LazyCollectionStats,
528}
529
530/// Global lazy resources manager
531pub struct LazyResourceManager {
532    resources: RwLock<HashMap<String, Box<dyn std::any::Any + Send + Sync>>>,
533    initializers: RwLock<
534        HashMap<String, Box<dyn Fn() -> Box<dyn std::any::Any + Send + Sync> + Send + Sync>>,
535    >,
536}
537
538impl LazyResourceManager {
539    /// Create a new resource manager
540    pub fn new() -> Self {
541        Self {
542            resources: RwLock::new(HashMap::new()),
543            initializers: RwLock::new(HashMap::new()),
544        }
545    }
546
547    /// Register a lazy resource
548    pub fn register<T, F>(&self, name: &str, initializer: F)
549    where
550        T: Send + Sync + 'static,
551        F: Fn() -> T + Send + Sync + 'static,
552    {
553        let initializers = &mut *self.initializers.write().unwrap();
554        initializers.insert(name.to_string(), Box::new(move || Box::new(initializer())));
555    }
556
557    /// Get a resource, initializing it if necessary
558    pub fn get<T>(&self, name: &str) -> Option<Arc<T>>
559    where
560        T: Send + Sync + 'static,
561    {
562        // Try to get existing resource
563        {
564            let resources = self.resources.read().unwrap();
565            if let Some(resource) = resources.get(name) {
566                if let Some(typed_resource) = resource.downcast_ref::<Arc<T>>() {
567                    return Some(Arc::clone(typed_resource));
568                }
569            }
570        }
571
572        // Initialize the resource
573        let initializer = {
574            let initializers = self.initializers.read().unwrap();
575            initializers.get(name)?.as_ref()
576                as *const dyn Fn() -> Box<dyn std::any::Any + Send + Sync>
577        };
578
579        let resource = unsafe { (*initializer)() };
580
581        if let Ok(typed_resource) = resource.downcast::<Arc<T>>() {
582            let result = Arc::clone(&typed_resource);
583
584            // Cache the resource
585            let mut resources = self.resources.write().unwrap();
586            resources.insert(name.to_string(), typed_resource);
587
588            Some(result)
589        } else {
590            None
591        }
592    }
593
594    /// Clear all resources
595    pub fn clear(&self) {
596        self.resources.write().unwrap().clear();
597    }
598}
599
600impl Default for LazyResourceManager {
601    fn default() -> Self {
602        Self::new()
603    }
604}
605
606/// Global lazy resource manager instance
607static GLOBAL_RESOURCE_MANAGER: std::sync::OnceLock<LazyResourceManager> =
608    std::sync::OnceLock::new();
609
610/// Get the global resource manager
611pub fn global_resource_manager() -> &'static LazyResourceManager {
612    GLOBAL_RESOURCE_MANAGER.get_or_init(|| LazyResourceManager::new())
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618    use std::fs;
619    use tempfile::tempdir;
620
621    #[test]
622    fn test_lazy_value() {
623        let counter = Arc::new(Mutex::new(0));
624        let counter_clone = Arc::clone(&counter);
625
626        let lazy = Lazy::new(move || {
627            let mut count = counter_clone.lock().unwrap();
628            *count += 1;
629            Ok(*count)
630        });
631
632        assert!(!lazy.is_initialized());
633
634        // First access should initialize
635        let value1 = lazy.get().unwrap();
636        assert_eq!(*value1, 1);
637        assert!(lazy.is_initialized());
638
639        // Second access should return cached value
640        let value2 = lazy.get().unwrap();
641        assert_eq!(*value2, 1);
642
643        // Counter should only be incremented once
644        assert_eq!(*counter.lock().unwrap(), 1);
645    }
646
647    #[test]
648    fn test_lazy_collection() {
649        let collection = LazyCollection::new(|key: &String| Ok(format!("Value for {}", key)));
650
651        let value1 = collection.get(&"test".to_string()).unwrap();
652        assert_eq!(value1, "Value for test");
653
654        assert!(collection.is_cached(&"test".to_string()));
655        assert!(!collection.is_cached(&"other".to_string()));
656    }
657
658    #[test]
659    fn test_lazy_file_loader() {
660        let temp_dir = tempdir().unwrap();
661        let file_path = temp_dir.path().join("test.txt");
662        fs::write(&file_path, "Hello, World!").unwrap();
663
664        let loader = LazyFileLoader::new(temp_dir.path(), 1024);
665
666        let data = loader.load_file("test.txt").unwrap();
667        assert_eq!(data, b"Hello, World!");
668
669        let stats = loader.cache_stats();
670        assert_eq!(stats.cached_files, 1);
671        assert!(stats.cache_size > 0);
672    }
673
674    #[test]
675    fn test_resource_manager() {
676        let manager = LazyResourceManager::new();
677
678        manager.register("counter", || Arc::new(Mutex::new(42)));
679
680        let counter1: Arc<Mutex<i32>> = manager.get("counter").unwrap();
681        let counter2: Arc<Mutex<i32>> = manager.get("counter").unwrap();
682
683        // Both should point to the same resource
684        assert_eq!(*counter1.lock().unwrap(), 42);
685        assert_eq!(*counter2.lock().unwrap(), 42);
686
687        *counter1.lock().unwrap() = 100;
688        assert_eq!(*counter2.lock().unwrap(), 100);
689    }
690}