chess_vector_engine/utils/
lazy_motifs.rs

1//! Lazy loading system for strategic motifs
2//! 
3//! This module implements an on-demand loading system for strategic chess motifs,
4//! significantly reducing memory usage and startup time by only loading patterns
5//! when they're actually needed for evaluation.
6
7use crate::strategic_motifs::{StrategicMotif, MotifMatch, MotifType, GamePhase};
8use chess::Board;
9use std::collections::HashMap;
10use std::path::{Path, PathBuf};
11use std::sync::{Arc, RwLock, Mutex};
12use std::time::{Duration, Instant};
13use serde::{Deserialize, Serialize};
14
15/// Configuration for lazy loading behavior
16#[derive(Debug, Clone)]
17pub struct LazyLoadConfig {
18    /// Maximum number of motifs to keep in memory at once
19    pub max_cached_motifs: usize,
20    /// How long to keep unused motifs in memory (seconds)
21    pub motif_ttl_secs: u64,
22    /// Maximum number of files to keep file handles open for
23    pub max_open_files: usize,
24    /// Enable compression for motif files
25    pub use_compression: bool,
26}
27
28impl Default for LazyLoadConfig {
29    fn default() -> Self {
30        Self {
31            max_cached_motifs: 1000,  // Keep 1000 most recent motifs in memory
32            motif_ttl_secs: 300,      // 5 minutes TTL
33            max_open_files: 10,       // Keep 10 files open
34            use_compression: true,
35        }
36    }
37}
38
39/// Metadata for a motif file segment
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct MotifSegmentMeta {
42    /// File path for this segment
43    pub file_path: PathBuf,
44    /// Range of motif IDs in this segment
45    pub id_range: (u64, u64),
46    /// Number of motifs in this segment
47    pub motif_count: usize,
48    /// File size in bytes
49    pub file_size: u64,
50    /// Game phase this segment focuses on
51    pub primary_phase: GamePhase,
52    /// Secondary phases this segment covers
53    pub secondary_phases: Vec<GamePhase>,
54    /// Creation timestamp for cache management
55    pub created_at: std::time::SystemTime,
56}
57
58/// Index mapping motif IDs to their file segments
59#[derive(Debug, Serialize, Deserialize)]
60pub struct MotifIndex {
61    /// Map from motif ID to segment metadata
62    pub motif_to_segment: HashMap<u64, MotifSegmentMeta>,
63    /// Map from pattern hash to motif IDs
64    pub pattern_to_motifs: HashMap<u64, Vec<u64>>,
65    /// Game phase to relevant segment files
66    pub phase_to_segments: HashMap<GamePhase, Vec<PathBuf>>,
67    /// Total number of motifs across all segments
68    pub total_motifs: usize,
69}
70
71/// Cache entry for loaded motifs with TTL
72#[derive(Debug, Clone)]
73struct CachedMotif {
74    motif: StrategicMotif,
75    last_accessed: Instant,
76    access_count: u32,
77}
78
79/// File handle cache for efficient segment access
80struct FileHandleCache {
81    handles: HashMap<PathBuf, Box<dyn std::io::Read + Send>>,
82    last_accessed: HashMap<PathBuf, Instant>,
83    max_handles: usize,
84}
85
86impl FileHandleCache {
87    fn new(max_handles: usize) -> Self {
88        Self {
89            handles: HashMap::new(),
90            last_accessed: HashMap::new(),
91            max_handles,
92        }
93    }
94
95    fn evict_old_handles(&mut self) {
96        if self.handles.len() >= self.max_handles {
97            // Remove least recently used handle
98            if let Some((oldest_path, _)) = self.last_accessed.iter()
99                .min_by_key(|(_, &time)| time)
100                .map(|(path, time)| (path.clone(), *time))
101            {
102                self.handles.remove(&oldest_path);
103                self.last_accessed.remove(&oldest_path);
104            }
105        }
106    }
107}
108
109/// Lazy-loading strategic motif database
110pub struct LazyStrategicDatabase {
111    /// Configuration for lazy loading
112    config: LazyLoadConfig,
113    /// Index mapping motifs to file segments
114    index: MotifIndex,
115    /// Cache of recently accessed motifs
116    motif_cache: Arc<RwLock<HashMap<u64, CachedMotif>>>,
117    /// File handle cache for efficient access
118    file_cache: Arc<Mutex<FileHandleCache>>,
119    /// Base directory for motif files
120    base_dir: PathBuf,
121    /// Statistics for monitoring performance
122    stats: Arc<RwLock<LazyLoadStats>>,
123}
124
125/// Statistics for monitoring lazy loading performance
126#[derive(Debug, Default)]
127pub struct LazyLoadStats {
128    pub cache_hits: u64,
129    pub cache_misses: u64,
130    pub files_loaded: u64,
131    pub motifs_loaded: u64,
132    pub cache_evictions: u64,
133    pub file_handle_evictions: u64,
134    pub total_load_time_ms: u64,
135    pub average_load_time_ms: f64,
136}
137
138impl LazyLoadStats {
139    pub fn hit_ratio(&self) -> f64 {
140        if self.cache_hits + self.cache_misses == 0 {
141            0.0
142        } else {
143            self.cache_hits as f64 / (self.cache_hits + self.cache_misses) as f64
144        }
145    }
146
147    pub fn update_load_time(&mut self, load_time_ms: u64) {
148        self.total_load_time_ms += load_time_ms;
149        self.average_load_time_ms = self.total_load_time_ms as f64 / self.files_loaded.max(1) as f64;
150    }
151}
152
153impl LazyStrategicDatabase {
154    /// Create new lazy-loading strategic database
155    pub fn new<P: AsRef<Path>>(base_dir: P, config: LazyLoadConfig) -> Result<Self, Box<dyn std::error::Error>> {
156        let base_dir = base_dir.as_ref().to_path_buf();
157        
158        // Load the index file
159        let index_path = base_dir.join("motif_index.bin");
160        let index = Self::load_index(&index_path)?;
161        
162        let file_cache = FileHandleCache::new(config.max_open_files);
163        
164        Ok(Self {
165            config,
166            index,
167            motif_cache: Arc::new(RwLock::new(HashMap::new())),
168            file_cache: Arc::new(Mutex::new(file_cache)),
169            base_dir,
170            stats: Arc::new(RwLock::new(LazyLoadStats::default())),
171        })
172    }
173
174    /// Load motif index from file
175    fn load_index(index_path: &Path) -> Result<MotifIndex, Box<dyn std::error::Error>> {
176        use std::fs::File;
177        use std::io::BufReader;
178
179        let file = File::open(index_path)?;
180        let reader = BufReader::new(file);
181        let index: MotifIndex = bincode::deserialize_from(reader)?;
182        Ok(index)
183    }
184
185    /// Get a motif by ID, loading from disk if necessary
186    pub fn get_motif(&self, motif_id: u64) -> Result<Option<StrategicMotif>, Box<dyn std::error::Error>> {
187        // Check cache first
188        {
189            let cache = self.motif_cache.read().unwrap();
190            if let Some(cached) = cache.get(&motif_id) {
191                self.stats.write().unwrap().cache_hits += 1;
192                return Ok(Some(cached.motif.clone()));
193            }
194        }
195
196        self.stats.write().unwrap().cache_misses += 1;
197
198        // Find which segment contains this motif
199        let segment = match self.index.motif_to_segment.get(&motif_id) {
200            Some(segment) => segment,
201            None => return Ok(None),
202        };
203
204        // Load the motif from disk
205        let motif = self.load_motif_from_segment(motif_id, segment)?;
206        
207        if let Some(motif) = motif.as_ref() {
208            // Cache the loaded motif
209            self.cache_motif(motif_id, motif.clone());
210        }
211
212        Ok(motif)
213    }
214
215    /// Find motifs matching a pattern hash
216    pub fn find_motifs_by_pattern(&self, pattern_hash: u64) -> Result<Vec<StrategicMotif>, Box<dyn std::error::Error>> {
217        let motif_ids = match self.index.pattern_to_motifs.get(&pattern_hash) {
218            Some(ids) => ids,
219            None => return Ok(Vec::new()),
220        };
221
222        let mut results = Vec::new();
223        for &motif_id in motif_ids {
224            if let Some(motif) = self.get_motif(motif_id)? {
225                results.push(motif);
226            }
227        }
228
229        Ok(results)
230    }
231
232    /// Find motifs relevant to a specific game phase
233    pub fn find_motifs_by_phase(&self, phase: &GamePhase) -> Result<Vec<StrategicMotif>, Box<dyn std::error::Error>> {
234        let segment_paths = match self.index.phase_to_segments.get(phase) {
235            Some(paths) => paths,
236            None => return Ok(Vec::new()),
237        };
238
239        let mut results = Vec::new();
240        
241        // Load a sampling of motifs from relevant segments (not all at once)
242        for path in segment_paths.iter().take(3) { // Limit to 3 segments to avoid memory bloat
243            let motifs = self.load_segment_sample(path, 10)?; // Load 10 motifs per segment
244            results.extend(motifs);
245        }
246
247        Ok(results)
248    }
249
250    /// Evaluate a position against relevant strategic motifs
251    pub fn evaluate_position(&self, board: &Board) -> Result<Vec<MotifMatch>, Box<dyn std::error::Error>> {
252        let position_hash = self.calculate_position_hash(board);
253        
254        // Find motifs that might match this position pattern
255        let relevant_motifs = self.find_motifs_by_pattern(position_hash)?;
256        
257        let mut matches = Vec::new();
258        
259        for motif in relevant_motifs {
260            if let Some(motif_match) = self.match_motif_to_position(&motif, board) {
261                matches.push(motif_match);
262            }
263        }
264
265        // Sort by relevance score
266        matches.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap_or(std::cmp::Ordering::Equal));
267        
268        Ok(matches)
269    }
270
271    /// Load a specific motif from its segment file
272    fn load_motif_from_segment(&self, motif_id: u64, segment: &MotifSegmentMeta) -> Result<Option<StrategicMotif>, Box<dyn std::error::Error>> {
273        let start_time = Instant::now();
274        
275        let file_path = self.base_dir.join(&segment.file_path);
276        let motifs = self.load_segment(&file_path)?;
277        
278        let load_time = start_time.elapsed().as_millis() as u64;
279        {
280            let mut stats = self.stats.write().unwrap();
281            stats.files_loaded += 1;
282            stats.motifs_loaded += motifs.len() as u64;
283            stats.update_load_time(load_time);
284        }
285
286        // Find the specific motif
287        let motif = motifs.into_iter().find(|m| m.id == motif_id);
288        Ok(motif)
289    }
290
291    /// Load a sample of motifs from a segment (for phase-based queries)
292    fn load_segment_sample(&self, path: &Path, max_count: usize) -> Result<Vec<StrategicMotif>, Box<dyn std::error::Error>> {
293        let full_path = self.base_dir.join(path);
294        let motifs = self.load_segment(&full_path)?;
295        
296        // Take a sample to avoid loading too many motifs at once
297        Ok(motifs.into_iter().take(max_count).collect())
298    }
299
300    /// Load an entire segment file
301    fn load_segment(&self, path: &Path) -> Result<Vec<StrategicMotif>, Box<dyn std::error::Error>> {
302        use std::fs::File;
303        use std::io::BufReader;
304
305        let file = File::open(path)?;
306        let reader = BufReader::new(file);
307        
308        let motifs: Vec<StrategicMotif> = if self.config.use_compression {
309            // Handle compressed files
310            bincode::deserialize_from(reader)?
311        } else {
312            bincode::deserialize_from(reader)?
313        };
314
315        Ok(motifs)
316    }
317
318    /// Cache a motif with TTL
319    fn cache_motif(&self, motif_id: u64, motif: StrategicMotif) {
320        let mut cache = self.motif_cache.write().unwrap();
321        
322        // Evict old entries if cache is full
323        if cache.len() >= self.config.max_cached_motifs {
324            self.evict_old_motifs(&mut cache);
325        }
326
327        cache.insert(motif_id, CachedMotif {
328            motif,
329            last_accessed: Instant::now(),
330            access_count: 1,
331        });
332    }
333
334    /// Evict old motifs from cache based on TTL and LRU
335    fn evict_old_motifs(&self, cache: &mut HashMap<u64, CachedMotif>) {
336        let now = Instant::now();
337        let ttl = Duration::from_secs(self.config.motif_ttl_secs);
338        
339        // Remove expired entries
340        let expired_keys: Vec<u64> = cache.iter()
341            .filter(|(_, cached)| now.duration_since(cached.last_accessed) > ttl)
342            .map(|(&id, _)| id)
343            .collect();
344            
345        for key in expired_keys {
346            cache.remove(&key);
347            self.stats.write().unwrap().cache_evictions += 1;
348        }
349
350        // If still too many, remove least recently used
351        while cache.len() >= self.config.max_cached_motifs {
352            if let Some((lru_id, _)) = cache.iter()
353                .min_by_key(|(_, cached)| cached.last_accessed)
354                .map(|(&id, cached)| (id, cached))
355            {
356                cache.remove(&lru_id);
357                self.stats.write().unwrap().cache_evictions += 1;
358            } else {
359                break;
360            }
361        }
362    }
363
364    /// Calculate a simple position hash for pattern matching
365    fn calculate_position_hash(&self, board: &Board) -> u64 {
366        use std::collections::hash_map::DefaultHasher;
367        use std::hash::{Hash, Hasher};
368        
369        let mut hasher = DefaultHasher::new();
370        board.to_string().hash(&mut hasher);
371        hasher.finish()
372    }
373
374    /// Match a motif against a position
375    fn match_motif_to_position(&self, motif: &StrategicMotif, board: &Board) -> Option<MotifMatch> {
376        // Simplified matching logic - in a real implementation this would be more sophisticated
377        let relevance = match &motif.motif_type {
378            MotifType::PawnStructure(_) => 0.7,
379            MotifType::PieceCoordination(_) => 0.6,
380            MotifType::KingSafety(_) => 0.8,
381            MotifType::Initiative(_) => 0.5,
382            MotifType::Endgame(_) => 0.6,
383            MotifType::Opening(_) => 0.4,
384        };
385
386        if relevance > 0.3 {
387            Some(MotifMatch {
388                motif: motif.clone(),
389                relevance,
390                matching_squares: Vec::new(), // Would be populated by real pattern matching
391            })
392        } else {
393            None
394        }
395    }
396
397    /// Get cache statistics
398    pub fn get_stats(&self) -> LazyLoadStats {
399        let stats = self.stats.read().unwrap();
400        LazyLoadStats {
401            cache_hits: stats.cache_hits,
402            cache_misses: stats.cache_misses,
403            files_loaded: stats.files_loaded,
404            motifs_loaded: stats.motifs_loaded,
405            cache_evictions: stats.cache_evictions,
406            file_handle_evictions: stats.file_handle_evictions,
407            total_load_time_ms: stats.total_load_time_ms,
408            average_load_time_ms: stats.average_load_time_ms,
409        }
410    }
411
412    /// Clear all caches and reset statistics
413    pub fn clear_caches(&self) {
414        self.motif_cache.write().unwrap().clear();
415        self.file_cache.lock().unwrap().handles.clear();
416        *self.stats.write().unwrap() = LazyLoadStats::default();
417    }
418
419    /// Preload motifs for a specific game phase (optimization)
420    pub fn preload_phase(&self, phase: GamePhase) -> Result<usize, Box<dyn std::error::Error>> {
421        let motifs = self.find_motifs_by_phase(&phase)?;
422        let count = motifs.len();
423        
424        // Motifs are now cached from the find_motifs_by_phase call
425        Ok(count)
426    }
427
428    /// Get total number of available motifs
429    pub fn total_motifs(&self) -> usize {
430        self.index.total_motifs
431    }
432
433    /// Get number of cached motifs
434    pub fn cached_motifs(&self) -> usize {
435        self.motif_cache.read().unwrap().len()
436    }
437}
438
439/// Utility for creating motif segment files
440pub struct MotifSegmentBuilder {
441    config: LazyLoadConfig,
442    base_dir: PathBuf,
443}
444
445impl MotifSegmentBuilder {
446    pub fn new<P: AsRef<Path>>(base_dir: P, config: LazyLoadConfig) -> Self {
447        Self {
448            config,
449            base_dir: base_dir.as_ref().to_path_buf(),
450        }
451    }
452
453    /// Split a large collection of motifs into segment files
454    pub fn create_segments(&self, motifs: Vec<StrategicMotif>, motifs_per_segment: usize) -> Result<MotifIndex, Box<dyn std::error::Error>> {
455        let mut index = MotifIndex {
456            motif_to_segment: HashMap::new(),
457            pattern_to_motifs: HashMap::new(),
458            phase_to_segments: HashMap::new(),
459            total_motifs: motifs.len(),
460        };
461
462        // Group motifs by game phase for better locality
463        let mut phase_groups: HashMap<GamePhase, Vec<StrategicMotif>> = HashMap::new();
464        
465        for motif in motifs {
466            let phase = motif.context.game_phase.clone();
467            phase_groups.entry(phase).or_insert_with(Vec::new).push(motif);
468        }
469
470        // Create segments for each phase
471        for (phase, phase_motifs) in phase_groups {
472            let segments = self.create_phase_segments(phase_motifs, motifs_per_segment, &phase)?;
473            
474            for segment in segments {
475                index.phase_to_segments.entry(phase.clone()).or_insert_with(Vec::new).push(segment.file_path.clone());
476                
477                // Update index mappings for motifs in this segment
478                for motif_id in segment.id_range.0..=segment.id_range.1 {
479                    index.motif_to_segment.insert(motif_id, segment.clone());
480                }
481            }
482        }
483
484        // Save the index
485        self.save_index(&index)?;
486        
487        Ok(index)
488    }
489
490    fn create_phase_segments(&self, motifs: Vec<StrategicMotif>, motifs_per_segment: usize, phase: &GamePhase) -> Result<Vec<MotifSegmentMeta>, Box<dyn std::error::Error>> {
491        let mut segments = Vec::new();
492        
493        for (segment_idx, chunk) in motifs.chunks(motifs_per_segment).enumerate() {
494            let filename = format!("{:?}_segment_{}.bin", phase, segment_idx);
495            let file_path = self.base_dir.join(&filename);
496            
497            // Write the segment file
498            self.write_segment_file(&file_path, chunk)?;
499            
500            let id_range = if chunk.is_empty() {
501                (0, 0)
502            } else {
503                (chunk[0].id, chunk[chunk.len() - 1].id)
504            };
505
506            let segment = MotifSegmentMeta {
507                file_path: PathBuf::from(filename),
508                id_range,
509                motif_count: chunk.len(),
510                file_size: std::fs::metadata(&file_path)?.len(),
511                primary_phase: phase.clone(),
512                secondary_phases: Vec::new(),
513                created_at: std::time::SystemTime::now(),
514            };
515
516            segments.push(segment);
517        }
518
519        Ok(segments)
520    }
521
522    fn write_segment_file(&self, path: &Path, motifs: &[StrategicMotif]) -> Result<(), Box<dyn std::error::Error>> {
523        use std::fs::File;
524        use std::io::BufWriter;
525
526        let file = File::create(path)?;
527        let writer = BufWriter::new(file);
528        
529        bincode::serialize_into(writer, motifs)?;
530        Ok(())
531    }
532
533    fn save_index(&self, index: &MotifIndex) -> Result<(), Box<dyn std::error::Error>> {
534        use std::fs::File;
535        use std::io::BufWriter;
536
537        let index_path = self.base_dir.join("motif_index.bin");
538        let file = File::create(index_path)?;
539        let writer = BufWriter::new(file);
540        
541        bincode::serialize_into(writer, index)?;
542        Ok(())
543    }
544}
545
546#[cfg(test)]
547mod tests {
548    use super::*;
549    use tempfile::TempDir;
550
551    #[test]
552    fn test_lazy_loading_basic_functionality() {
553        // This would require test data setup
554        // For now, just test that structures can be created
555        let config = LazyLoadConfig::default();
556        assert_eq!(config.max_cached_motifs, 1000);
557    }
558
559    #[test]
560    fn test_cache_eviction() {
561        let config = LazyLoadConfig {
562            max_cached_motifs: 2,
563            motif_ttl_secs: 0, // Immediate expiration
564            ..Default::default()
565        };
566        
567        // Test would require more setup for full functionality
568        assert!(config.max_cached_motifs == 2);
569    }
570}