1pub mod ann;
69pub mod auto_discovery;
70pub mod gpu_acceleration;
71pub mod lichess_loader;
72pub mod lsh;
73pub mod manifold_learner;
74pub mod nnue;
75pub mod opening_book;
76pub mod persistence;
77pub mod position_encoder;
78pub mod similarity_search;
79pub mod streaming_loader;
80pub mod tactical_search;
81pub mod training;
82pub mod ultra_fast_loader;
83pub mod variational_autoencoder;
84pub mod uci;
86
87pub use auto_discovery::{AutoDiscovery, FormatPriority, TrainingFile};
88pub use gpu_acceleration::{DeviceType, GPUAccelerator};
89pub use lichess_loader::LichessLoader;
90pub use lsh::LSH;
91pub use manifold_learner::ManifoldLearner;
92pub use nnue::{BlendStrategy, EvalStats, HybridEvaluator, NNUEConfig, NNUE};
93pub use opening_book::{OpeningBook, OpeningBookStats, OpeningEntry};
94pub use persistence::{Database, LSHTableData, PositionData};
95pub use position_encoder::PositionEncoder;
96pub use similarity_search::SimilaritySearch;
97pub use streaming_loader::StreamingLoader;
98pub use tactical_search::{TacticalConfig, TacticalResult, TacticalSearch};
99pub use training::{
100 EngineEvaluator, GameExtractor, SelfPlayConfig, SelfPlayTrainer, TacticalPuzzle,
101 TacticalPuzzleParser, TacticalTrainingData, TrainingData, TrainingDataset,
102};
103pub use ultra_fast_loader::{LoadingStats, UltraFastLoader};
104pub use variational_autoencoder::{VAEConfig, VariationalAutoencoder};
105pub use uci::{run_uci_engine, run_uci_engine_with_config, UCIConfig, UCIEngine};
107
108use chess::{Board, ChessMove};
109use ndarray::{Array1, Array2};
110use serde_json::Value;
111use std::collections::HashMap;
112use std::path::Path;
113use std::str::FromStr;
114
115fn move_centrality(chess_move: &ChessMove) -> f32 {
118 let dest_square = chess_move.get_dest();
119 let rank = dest_square.get_rank().to_index() as f32;
120 let file = dest_square.get_file().to_index() as f32;
121
122 let center_rank = 3.5;
124 let center_file = 3.5;
125
126 let rank_distance = (rank - center_rank).abs();
127 let file_distance = (file - center_file).abs();
128
129 let max_distance = 3.5; let distance = (rank_distance + file_distance) / 2.0;
132 max_distance - distance
133}
134
135#[derive(Debug, Clone)]
137pub struct MoveRecommendation {
138 pub chess_move: ChessMove,
139 pub confidence: f32,
140 pub from_similar_position_count: usize,
141 pub average_outcome: f32,
142}
143
144#[derive(Debug, Clone)]
146pub struct TrainingStats {
147 pub total_positions: usize,
148 pub unique_positions: usize,
149 pub has_move_data: bool,
150 pub move_data_entries: usize,
151 pub lsh_enabled: bool,
152 pub manifold_enabled: bool,
153 pub opening_book_enabled: bool,
154}
155
156#[derive(Debug, Clone)]
158pub struct HybridConfig {
159 pub pattern_confidence_threshold: f32,
161 pub enable_tactical_refinement: bool,
163 pub tactical_config: TacticalConfig,
165 pub pattern_weight: f32,
167 pub min_similar_positions: usize,
169}
170
171impl Default for HybridConfig {
172 fn default() -> Self {
173 Self {
174 pattern_confidence_threshold: 0.8,
175 enable_tactical_refinement: true,
176 tactical_config: TacticalConfig::default(),
177 pattern_weight: 0.7, min_similar_positions: 3,
179 }
180 }
181}
182
183pub struct ChessVectorEngine {
237 encoder: PositionEncoder,
238 similarity_search: SimilaritySearch,
239 lsh_index: Option<LSH>,
240 manifold_learner: Option<ManifoldLearner>,
241 use_lsh: bool,
242 use_manifold: bool,
243 position_moves: HashMap<usize, Vec<(ChessMove, f32)>>,
245 manifold_similarity_search: Option<SimilaritySearch>,
247 manifold_lsh_index: Option<LSH>,
249 position_vectors: Vec<Array1<f32>>,
251 position_boards: Vec<Board>,
253 position_evaluations: Vec<f32>,
255 opening_book: Option<OpeningBook>,
257 database: Option<Database>,
259 tactical_search: Option<TacticalSearch>,
261 hybrid_config: HybridConfig,
265}
266
267impl Clone for ChessVectorEngine {
268 fn clone(&self) -> Self {
269 Self {
270 encoder: self.encoder.clone(),
271 similarity_search: self.similarity_search.clone(),
272 lsh_index: self.lsh_index.clone(),
273 manifold_learner: None, use_lsh: self.use_lsh,
275 use_manifold: false, position_moves: self.position_moves.clone(),
277 manifold_similarity_search: self.manifold_similarity_search.clone(),
278 manifold_lsh_index: self.manifold_lsh_index.clone(),
279 position_vectors: self.position_vectors.clone(),
280 position_boards: self.position_boards.clone(),
281 position_evaluations: self.position_evaluations.clone(),
282 opening_book: self.opening_book.clone(),
283 database: None, tactical_search: self.tactical_search.clone(),
285 hybrid_config: self.hybrid_config.clone(),
287 }
288 }
289}
290
291impl ChessVectorEngine {
292 pub fn new(vector_size: usize) -> Self {
294 let mut engine = Self {
295 encoder: PositionEncoder::new(vector_size),
296 similarity_search: SimilaritySearch::new(vector_size),
297 lsh_index: None,
298 manifold_learner: None,
299 use_lsh: false,
300 use_manifold: false,
301 position_moves: HashMap::new(),
302 manifold_similarity_search: None,
303 manifold_lsh_index: None,
304 position_vectors: Vec::new(),
305 position_boards: Vec::new(),
306 position_evaluations: Vec::new(),
307 opening_book: None,
308 database: None,
309 tactical_search: None,
310 hybrid_config: HybridConfig::default(),
312 };
313
314 engine.enable_tactical_search_default();
316 engine
317 }
318
319 pub fn new_strong(vector_size: usize) -> Self {
321 let mut engine = Self::new(vector_size);
322 engine.enable_tactical_search(crate::tactical_search::TacticalConfig::strong());
324 engine
325 }
326
327 pub fn new_lightweight(vector_size: usize) -> Self {
329 Self {
330 encoder: PositionEncoder::new(vector_size),
331 similarity_search: SimilaritySearch::new(vector_size),
332 lsh_index: None,
333 manifold_learner: None,
334 use_lsh: false,
335 use_manifold: false,
336 position_moves: HashMap::new(),
337 manifold_similarity_search: None,
338 manifold_lsh_index: None,
339 position_vectors: Vec::new(),
340 position_boards: Vec::new(),
341 position_evaluations: Vec::new(),
342 opening_book: None,
343 database: None,
344 tactical_search: None, hybrid_config: HybridConfig::default(),
346 }
347 }
348
349 pub fn new_adaptive(vector_size: usize, expected_positions: usize, use_case: &str) -> Self {
352 match use_case {
353 "training" => {
354 if expected_positions > 10000 {
355 Self::new_with_lsh(vector_size, 12, 20)
357 } else {
358 Self::new(vector_size)
359 }
360 }
361 "gameplay" => {
362 if expected_positions > 15000 {
363 Self::new_with_lsh(vector_size, 10, 18)
365 } else {
366 Self::new(vector_size)
367 }
368 }
369 "analysis" => {
370 if expected_positions > 10000 {
371 Self::new_with_lsh(vector_size, 14, 22)
373 } else {
374 Self::new(vector_size)
375 }
376 }
377 _ => Self::new(vector_size), }
379 }
380
381 pub fn new_with_lsh(vector_size: usize, num_tables: usize, hash_size: usize) -> Self {
383 Self {
384 encoder: PositionEncoder::new(vector_size),
385 similarity_search: SimilaritySearch::new(vector_size),
386 lsh_index: Some(LSH::new(vector_size, num_tables, hash_size)),
387 manifold_learner: None,
388 use_lsh: true,
389 use_manifold: false,
390 position_moves: HashMap::new(),
391 manifold_similarity_search: None,
392 manifold_lsh_index: None,
393 position_vectors: Vec::new(),
394 position_boards: Vec::new(),
395 position_evaluations: Vec::new(),
396 opening_book: None,
397 database: None,
398 tactical_search: None,
399 hybrid_config: HybridConfig::default(),
401 }
402 }
403
404 pub fn enable_lsh(&mut self, num_tables: usize, hash_size: usize) {
406 self.lsh_index = Some(LSH::new(self.encoder.vector_size(), num_tables, hash_size));
407 self.use_lsh = true;
408
409 if let Some(ref mut lsh) = self.lsh_index {
411 for (vector, evaluation) in self.similarity_search.get_all_positions() {
412 lsh.add_vector(vector, evaluation);
413 }
414 }
415 }
416
417 pub fn add_position(&mut self, board: &Board, evaluation: f32) {
419 if !self.is_position_safe(board) {
421 return; }
423
424 let vector = self.encoder.encode(board);
425 self.similarity_search
426 .add_position(vector.clone(), evaluation);
427
428 self.position_vectors.push(vector.clone());
430 self.position_boards.push(*board);
431 self.position_evaluations.push(evaluation);
432
433 if let Some(ref mut lsh) = self.lsh_index {
435 lsh.add_vector(vector.clone(), evaluation);
436 }
437
438 if self.use_manifold {
440 if let Some(ref learner) = self.manifold_learner {
441 let compressed = learner.encode(&vector);
442
443 if let Some(ref mut search) = self.manifold_similarity_search {
444 search.add_position(compressed.clone(), evaluation);
445 }
446
447 if let Some(ref mut lsh) = self.manifold_lsh_index {
448 lsh.add_vector(compressed, evaluation);
449 }
450 }
451 }
452 }
453
454 pub fn find_similar_positions(&self, board: &Board, k: usize) -> Vec<(Array1<f32>, f32, f32)> {
456 let query_vector = self.encoder.encode(board);
457
458 if self.use_manifold {
460 if let Some(ref manifold_learner) = self.manifold_learner {
461 let compressed_query = manifold_learner.encode(&query_vector);
462
463 if let Some(ref lsh) = self.manifold_lsh_index {
465 return lsh.query(&compressed_query, k);
466 }
467
468 if let Some(ref search) = self.manifold_similarity_search {
470 return search.search(&compressed_query, k);
471 }
472 }
473 }
474
475 if self.use_lsh {
477 if let Some(ref lsh_index) = self.lsh_index {
478 return lsh_index.query(&query_vector, k);
479 }
480 }
481
482 self.similarity_search.search(&query_vector, k)
484 }
485
486 pub fn find_similar_positions_with_indices(
488 &self,
489 board: &Board,
490 k: usize,
491 ) -> Vec<(usize, f32, f32)> {
492 let query_vector = self.encoder.encode(board);
493
494 let mut results = Vec::new();
497
498 for (i, stored_vector) in self.position_vectors.iter().enumerate() {
499 let similarity = self.encoder.similarity(&query_vector, stored_vector);
500 let eval = self.position_evaluations.get(i).copied().unwrap_or(0.0);
501 results.push((i, eval, similarity));
502 }
503
504 results.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
506 results.truncate(k);
507
508 results
509 }
510
511 pub fn evaluate_position(&mut self, board: &Board) -> Option<f32> {
513 if let Some(entry) = self.get_opening_entry(board) {
522 return Some(entry.evaluation);
523 }
524
525 let similar_positions = self.find_similar_positions(board, 5);
527
528 if similar_positions.is_empty() {
529 if let Some(ref mut tactical_search) = self.tactical_search {
531 let result = tactical_search.search(board);
532 return Some(result.evaluation);
533 }
534 return None;
535 }
536
537 let mut weighted_sum = 0.0;
539 let mut weight_sum = 0.0;
540 let mut similarity_scores = Vec::new();
541
542 for (_, evaluation, similarity) in &similar_positions {
543 let weight = *similarity;
544 weighted_sum += evaluation * weight;
545 weight_sum += weight;
546 similarity_scores.push(*similarity);
547 }
548
549 let pattern_evaluation = weighted_sum / weight_sum;
550
551 let avg_similarity = similarity_scores.iter().sum::<f32>() / similarity_scores.len() as f32;
553 let count_factor = (similar_positions.len() as f32
554 / self.hybrid_config.min_similar_positions as f32)
555 .min(1.0);
556 let pattern_confidence = avg_similarity * count_factor;
557
558 let use_tactical = self.hybrid_config.enable_tactical_refinement
560 && pattern_confidence < self.hybrid_config.pattern_confidence_threshold
561 && self.tactical_search.is_some();
562
563 if use_tactical {
564 if let Some(ref mut tactical_search) = self.tactical_search {
566 let tactical_result = if tactical_search.config.enable_parallel_search {
567 tactical_search.search_parallel(board)
568 } else {
569 tactical_search.search(board)
570 };
571
572 let pattern_weight = self.hybrid_config.pattern_weight * pattern_confidence;
574 let tactical_weight = 1.0 - pattern_weight;
575
576 let hybrid_evaluation = (pattern_evaluation * pattern_weight)
577 + (tactical_result.evaluation * tactical_weight);
578
579 Some(hybrid_evaluation)
580 } else {
581 Some(pattern_evaluation)
583 }
584 } else {
585 Some(pattern_evaluation)
587 }
588 }
589
590 pub fn encode_position(&self, board: &Board) -> Array1<f32> {
592 self.encoder.encode(board)
593 }
594
595 pub fn calculate_similarity(&self, board1: &Board, board2: &Board) -> f32 {
597 let vec1 = self.encoder.encode(board1);
598 let vec2 = self.encoder.encode(board2);
599 self.encoder.similarity(&vec1, &vec2)
600 }
601
602 pub fn knowledge_base_size(&self) -> usize {
604 self.similarity_search.size()
605 }
606
607 pub fn save_training_data<P: AsRef<std::path::Path>>(
609 &self,
610 path: P,
611 ) -> Result<(), Box<dyn std::error::Error>> {
612 use crate::training::{TrainingData, TrainingDataset};
613
614 let mut dataset = TrainingDataset::new();
615
616 for (i, board) in self.position_boards.iter().enumerate() {
618 if i < self.position_evaluations.len() {
619 dataset.data.push(TrainingData {
620 board: *board,
621 evaluation: self.position_evaluations[i],
622 depth: 15, game_id: i, });
625 }
626 }
627
628 dataset.save_incremental(path)?;
629 println!("Saved {} positions to training data", dataset.data.len());
630 Ok(())
631 }
632
633 pub fn load_training_data_incremental<P: AsRef<std::path::Path>>(
635 &mut self,
636 path: P,
637 ) -> Result<(), Box<dyn std::error::Error>> {
638 use crate::training::TrainingDataset;
639 use indicatif::{ProgressBar, ProgressStyle};
640 use std::collections::HashSet;
641
642 let existing_size = self.knowledge_base_size();
643
644 let path_ref = path.as_ref();
646 let binary_path = path_ref.with_extension("bin");
647 if binary_path.exists() {
648 println!("š Loading optimized binary format...");
649 return self.load_training_data_binary(binary_path);
650 }
651
652 println!("š Loading training data from {}...", path_ref.display());
653 let dataset = TrainingDataset::load(path)?;
654
655 let total_positions = dataset.data.len();
656 if total_positions == 0 {
657 println!("ā ļø No positions found in dataset");
658 return Ok(());
659 }
660
661 let dedup_pb = ProgressBar::new(total_positions as u64);
663 dedup_pb.set_style(
664 ProgressStyle::default_bar()
665 .template("š Checking duplicates [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({percent}%) {msg}")?
666 .progress_chars("āāā")
667 );
668
669 let mut existing_boards: HashSet<_> = self.position_boards.iter().cloned().collect();
671 let mut new_positions = Vec::new();
672 let mut new_evaluations = Vec::new();
673
674 for (i, data) in dataset.data.into_iter().enumerate() {
676 if !existing_boards.contains(&data.board) {
677 existing_boards.insert(data.board);
678 new_positions.push(data.board);
679 new_evaluations.push(data.evaluation);
680 }
681
682 if i % 1000 == 0 || i == total_positions - 1 {
683 dedup_pb.set_position((i + 1) as u64);
684 dedup_pb.set_message(format!("{} new positions found", new_positions.len()));
685 }
686 }
687 dedup_pb.finish_with_message(format!("ā
Found {} new positions", new_positions.len()));
688
689 if new_positions.is_empty() {
690 println!("ā¹ļø No new positions to add (all positions already exist)");
691 return Ok(());
692 }
693
694 let add_pb = ProgressBar::new(new_positions.len() as u64);
696 add_pb.set_style(
697 ProgressStyle::default_bar()
698 .template("ā Adding positions [{elapsed_precise}] [{bar:40.green/blue}] {pos}/{len} ({percent}%) {msg}")?
699 .progress_chars("āāā")
700 );
701
702 for (i, (board, evaluation)) in new_positions
704 .into_iter()
705 .zip(new_evaluations.into_iter())
706 .enumerate()
707 {
708 self.add_position(&board, evaluation);
709
710 if i % 500 == 0 || i == add_pb.length().unwrap() as usize - 1 {
711 add_pb.set_position((i + 1) as u64);
712 add_pb.set_message("vectors encoded".to_string());
713 }
714 }
715 add_pb.finish_with_message("ā
All positions added");
716
717 println!(
718 "šÆ Loaded {} new positions (total: {})",
719 self.knowledge_base_size() - existing_size,
720 self.knowledge_base_size()
721 );
722 Ok(())
723 }
724
725 pub fn save_training_data_binary<P: AsRef<std::path::Path>>(
727 &self,
728 path: P,
729 ) -> Result<(), Box<dyn std::error::Error>> {
730 use lz4_flex::compress_prepend_size;
731
732 println!("š¾ Saving training data in binary format (compressed)...");
733
734 #[derive(serde::Serialize)]
736 struct BinaryTrainingData {
737 positions: Vec<String>, evaluations: Vec<f32>,
739 vectors: Vec<Vec<f32>>, created_at: i64,
741 }
742
743 let current_time = std::time::SystemTime::now()
744 .duration_since(std::time::UNIX_EPOCH)?
745 .as_secs() as i64;
746
747 let mut positions = Vec::with_capacity(self.position_boards.len());
749 let mut evaluations = Vec::with_capacity(self.position_boards.len());
750 let mut vectors = Vec::with_capacity(self.position_boards.len());
751
752 for (i, board) in self.position_boards.iter().enumerate() {
753 if i < self.position_evaluations.len() {
754 positions.push(board.to_string());
755 evaluations.push(self.position_evaluations[i]);
756
757 if i < self.position_vectors.len() {
759 if let Some(vector_slice) = self.position_vectors[i].as_slice() {
760 vectors.push(vector_slice.to_vec());
761 }
762 }
763 }
764 }
765
766 let binary_data = BinaryTrainingData {
767 positions,
768 evaluations,
769 vectors,
770 created_at: current_time,
771 };
772
773 let serialized = bincode::serialize(&binary_data)?;
775
776 let compressed = compress_prepend_size(&serialized);
778
779 std::fs::write(path, &compressed)?;
781
782 println!(
783 "ā
Saved {} positions to binary file ({} bytes compressed)",
784 binary_data.positions.len(),
785 compressed.len()
786 );
787 Ok(())
788 }
789
790 pub fn load_training_data_binary<P: AsRef<std::path::Path>>(
792 &mut self,
793 path: P,
794 ) -> Result<(), Box<dyn std::error::Error>> {
795 use indicatif::{ProgressBar, ProgressStyle};
796 use lz4_flex::decompress_size_prepended;
797
798 println!("š Loading training data from binary format...");
799
800 #[derive(serde::Deserialize)]
801 struct BinaryTrainingData {
802 positions: Vec<String>,
803 evaluations: Vec<f32>,
804 #[allow(dead_code)]
805 vectors: Vec<Vec<f32>>,
806 #[allow(dead_code)]
807 created_at: i64,
808 }
809
810 let existing_size = self.knowledge_base_size();
811
812 let file_size = std::fs::metadata(&path)?.len();
814 println!(
815 "š¦ Reading {} compressed file...",
816 Self::format_bytes(file_size)
817 );
818
819 let compressed_data = std::fs::read(path)?;
820 println!("š Decompressing data...");
821 let serialized = decompress_size_prepended(&compressed_data)?;
822
823 println!("š Deserializing binary data...");
824 let binary_data: BinaryTrainingData = bincode::deserialize(&serialized)?;
825
826 let total_positions = binary_data.positions.len();
827 if total_positions == 0 {
828 println!("ā ļø No positions found in binary file");
829 return Ok(());
830 }
831
832 println!("š Processing {total_positions} positions from binary format...");
833
834 let pb = ProgressBar::new(total_positions as u64);
836 pb.set_style(
837 ProgressStyle::default_bar()
838 .template("ā” Loading positions [{elapsed_precise}] [{bar:40.green/blue}] {pos}/{len} ({percent}%) {msg}")?
839 .progress_chars("āāā")
840 );
841
842 let mut added_count = 0;
843
844 for (i, fen) in binary_data.positions.iter().enumerate() {
846 if i < binary_data.evaluations.len() {
847 if let Ok(board) = fen.parse() {
848 if !self.position_boards.contains(&board) {
850 let mut evaluation = binary_data.evaluations[i];
851
852 if evaluation.abs() > 15.0 {
856 evaluation /= 100.0;
857 }
858
859 self.add_position(&board, evaluation);
860 added_count += 1;
861 }
862 }
863 }
864
865 if i % 1000 == 0 || i == total_positions - 1 {
866 pb.set_position((i + 1) as u64);
867 pb.set_message(format!("{added_count} new positions"));
868 }
869 }
870 pb.finish_with_message(format!("ā
Loaded {added_count} new positions"));
871
872 println!(
873 "šÆ Binary loading complete: {} new positions (total: {})",
874 self.knowledge_base_size() - existing_size,
875 self.knowledge_base_size()
876 );
877 Ok(())
878 }
879
880 pub fn load_training_data_mmap<P: AsRef<Path>>(
883 &mut self,
884 path: P,
885 ) -> Result<(), Box<dyn std::error::Error>> {
886 use memmap2::Mmap;
887 use std::fs::File;
888
889 let path_ref = path.as_ref();
890 println!(
891 "š Loading training data via memory mapping: {}",
892 path_ref.display()
893 );
894
895 let file = File::open(path_ref)?;
896 let mmap = unsafe { Mmap::map(&file)? };
897
898 if let Ok(data) = rmp_serde::from_slice::<Vec<(String, f32)>>(&mmap) {
900 println!("š¦ Detected MessagePack format");
901 return self.load_positions_from_tuples(data);
902 }
903
904 if let Ok(data) = bincode::deserialize::<Vec<(String, f32)>>(&mmap) {
906 println!("š¦ Detected bincode format");
907 return self.load_positions_from_tuples(data);
908 }
909
910 let decompressed = lz4_flex::decompress_size_prepended(&mmap)?;
912 let data: Vec<(String, f32)> = bincode::deserialize(&decompressed)?;
913 println!("š¦ Detected LZ4+bincode format");
914 self.load_positions_from_tuples(data)
915 }
916
917 pub fn load_training_data_msgpack<P: AsRef<Path>>(
920 &mut self,
921 path: P,
922 ) -> Result<(), Box<dyn std::error::Error>> {
923 use std::fs::File;
924 use std::io::BufReader;
925
926 let path_ref = path.as_ref();
927 println!(
928 "š Loading MessagePack training data: {}",
929 path_ref.display()
930 );
931
932 let file = File::open(path_ref)?;
933 let reader = BufReader::new(file);
934 let data: Vec<(String, f32)> = rmp_serde::from_read(reader)?;
935
936 println!("š¦ MessagePack data loaded: {} positions", data.len());
937 self.load_positions_from_tuples(data)
938 }
939
940 pub fn load_training_data_streaming_json<P: AsRef<Path>>(
943 &mut self,
944 path: P,
945 ) -> Result<(), Box<dyn std::error::Error>> {
946 use dashmap::DashMap;
947 use rayon::prelude::*;
948 use std::fs::File;
949 use std::io::{BufRead, BufReader};
950 use std::sync::Arc;
951
952 let path_ref = path.as_ref();
953 println!(
954 "š Loading JSON with streaming parallel processing: {}",
955 path_ref.display()
956 );
957
958 let file = File::open(path_ref)?;
959 let reader = BufReader::new(file);
960
961 let chunk_size = 10000;
963 let position_map = Arc::new(DashMap::new());
964
965 let lines: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;
966 let total_lines = lines.len();
967
968 lines.par_chunks(chunk_size).for_each(|chunk| {
970 for line in chunk {
971 if let Ok(data) = serde_json::from_str::<serde_json::Value>(line) {
972 if let (Some(fen), Some(eval)) = (
973 data.get("fen").and_then(|v| v.as_str()),
974 data.get("evaluation").and_then(|v| v.as_f64()),
975 ) {
976 position_map.insert(fen.to_string(), eval as f32);
977 }
978 }
979 }
980 });
981
982 println!(
983 "š¦ Parallel JSON processing complete: {} positions from {} lines",
984 position_map.len(),
985 total_lines
986 );
987
988 let data: Vec<(String, f32)> = match Arc::try_unwrap(position_map) {
991 Ok(map) => map.into_iter().collect(),
992 Err(arc_map) => {
993 arc_map
995 .iter()
996 .map(|entry| (entry.key().clone(), *entry.value()))
997 .collect()
998 }
999 };
1000 self.load_positions_from_tuples(data)
1001 }
1002
1003 pub fn load_training_data_compressed<P: AsRef<Path>>(
1006 &mut self,
1007 path: P,
1008 ) -> Result<(), Box<dyn std::error::Error>> {
1009 use std::fs::File;
1010 use std::io::BufReader;
1011
1012 let path_ref = path.as_ref();
1013 println!(
1014 "š Loading zstd compressed training data: {}",
1015 path_ref.display()
1016 );
1017
1018 let file = File::open(path_ref)?;
1019 let reader = BufReader::new(file);
1020 let decoder = zstd::stream::Decoder::new(reader)?;
1021
1022 if let Ok(data) = rmp_serde::from_read::<_, Vec<(String, f32)>>(decoder) {
1024 println!("š¦ Zstd+MessagePack data loaded: {} positions", data.len());
1025 return self.load_positions_from_tuples(data);
1026 }
1027
1028 let file = File::open(path_ref)?;
1030 let reader = BufReader::new(file);
1031 let decoder = zstd::stream::Decoder::new(reader)?;
1032 let data: Vec<(String, f32)> = bincode::deserialize_from(decoder)?;
1033
1034 println!("š¦ Zstd+bincode data loaded: {} positions", data.len());
1035 self.load_positions_from_tuples(data)
1036 }
1037
1038 fn load_positions_from_tuples(
1041 &mut self,
1042 data: Vec<(String, f32)>,
1043 ) -> Result<(), Box<dyn std::error::Error>> {
1044 use indicatif::{ProgressBar, ProgressStyle};
1045 use std::collections::HashSet;
1046
1047 let existing_size = self.knowledge_base_size();
1048 let mut seen_positions = HashSet::new();
1049 let mut loaded_count = 0;
1050
1051 let pb = ProgressBar::new(data.len() as u64);
1053 pb.set_style(ProgressStyle::with_template(
1054 "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({per_sec}) {msg}"
1055 )?);
1056
1057 for (fen, evaluation) in data {
1058 pb.inc(1);
1059
1060 if seen_positions.contains(&fen) {
1062 continue;
1063 }
1064 seen_positions.insert(fen.clone());
1065
1066 if let Ok(board) = Board::from_str(&fen) {
1068 self.add_position(&board, evaluation);
1069 loaded_count += 1;
1070
1071 if loaded_count % 1000 == 0 {
1072 pb.set_message(format!("Loaded {loaded_count} positions"));
1073 }
1074 }
1075 }
1076
1077 pb.finish_with_message(format!("ā
Loaded {loaded_count} new positions"));
1078
1079 println!(
1080 "šÆ Ultra-fast loading complete: {} new positions (total: {})",
1081 self.knowledge_base_size() - existing_size,
1082 self.knowledge_base_size()
1083 );
1084
1085 Ok(())
1086 }
1087
1088 fn format_bytes(bytes: u64) -> String {
1090 const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
1091 let mut size = bytes as f64;
1092 let mut unit_index = 0;
1093
1094 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
1095 size /= 1024.0;
1096 unit_index += 1;
1097 }
1098
1099 format!("{:.1} {}", size, UNITS[unit_index])
1100 }
1101
1102 pub fn train_from_dataset_incremental(&mut self, dataset: &crate::training::TrainingDataset) {
1104 let _existing_size = self.knowledge_base_size();
1105 let mut added = 0;
1106
1107 for data in &dataset.data {
1108 if !self.position_boards.contains(&data.board) {
1110 self.add_position(&data.board, data.evaluation);
1111 added += 1;
1112 }
1113 }
1114
1115 println!(
1116 "Added {} new positions from dataset (total: {})",
1117 added,
1118 self.knowledge_base_size()
1119 );
1120 }
1121
1122 pub fn training_stats(&self) -> TrainingStats {
1124 TrainingStats {
1125 total_positions: self.knowledge_base_size(),
1126 unique_positions: self.position_boards.len(),
1127 has_move_data: !self.position_moves.is_empty(),
1128 move_data_entries: self.position_moves.len(),
1129 lsh_enabled: self.use_lsh,
1130 manifold_enabled: self.use_manifold,
1131 opening_book_enabled: self.opening_book.is_some(),
1132 }
1133 }
1134
1135 pub fn auto_load_training_data(&mut self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
1137 use indicatif::{ProgressBar, ProgressStyle};
1138
1139 let common_files = vec![
1140 "training_data.json",
1141 "tactical_training_data.json",
1142 "engine_training.json",
1143 "chess_training.json",
1144 "my_training.json",
1145 ];
1146
1147 let tactical_files = vec![
1148 "tactical_puzzles.json",
1149 "lichess_puzzles.json",
1150 "my_puzzles.json",
1151 ];
1152
1153 let mut available_files = Vec::new();
1155 for file_path in &common_files {
1156 if std::path::Path::new(file_path).exists() {
1157 available_files.push((file_path, "training"));
1158 }
1159 }
1160 for file_path in &tactical_files {
1161 if std::path::Path::new(file_path).exists() {
1162 available_files.push((file_path, "tactical"));
1163 }
1164 }
1165
1166 if available_files.is_empty() {
1167 return Ok(Vec::new());
1168 }
1169
1170 println!(
1171 "š Found {} training files to auto-load",
1172 available_files.len()
1173 );
1174
1175 let pb = ProgressBar::new(available_files.len() as u64);
1177 pb.set_style(
1178 ProgressStyle::default_bar()
1179 .template("š Auto-loading files [{elapsed_precise}] [{bar:40.blue/cyan}] {pos}/{len} {msg}")?
1180 .progress_chars("āāā")
1181 );
1182
1183 let mut loaded_files = Vec::new();
1184
1185 for (i, (file_path, file_type)) in available_files.iter().enumerate() {
1186 pb.set_position(i as u64);
1187 pb.set_message("Processing...".to_string());
1188
1189 let result = match *file_type {
1190 "training" => self.load_training_data_incremental(file_path).map(|_| {
1191 loaded_files.push(file_path.to_string());
1192 println!("Loading complete");
1193 }),
1194 "tactical" => crate::training::TacticalPuzzleParser::load_tactical_puzzles(
1195 file_path,
1196 )
1197 .map(|puzzles| {
1198 crate::training::TacticalPuzzleParser::load_into_engine_incremental(
1199 &puzzles, self,
1200 );
1201 loaded_files.push(file_path.to_string());
1202 println!("Loading complete");
1203 }),
1204 _ => Ok(()),
1205 };
1206
1207 if let Err(_e) = result {
1208 println!("Loading complete");
1209 }
1210 }
1211
1212 pb.set_position(available_files.len() as u64);
1213 pb.finish_with_message(format!("ā
Auto-loaded {} files", loaded_files.len()));
1214
1215 Ok(loaded_files)
1216 }
1217
1218 pub fn load_lichess_puzzles<P: AsRef<std::path::Path>>(
1220 &mut self,
1221 csv_path: P,
1222 ) -> Result<(), Box<dyn std::error::Error>> {
1223 println!("š„ Loading Lichess puzzles with enhanced performance...");
1224 let puzzle_entries =
1225 crate::lichess_loader::load_lichess_puzzles_basic_with_moves(csv_path, 100000)?;
1226
1227 for (board, evaluation, best_move) in puzzle_entries {
1228 self.add_position_with_move(&board, evaluation, Some(best_move), Some(evaluation));
1229 }
1230
1231 println!("ā
Lichess puzzle loading complete!");
1232 Ok(())
1233 }
1234
1235 pub fn load_lichess_puzzles_with_limit<P: AsRef<std::path::Path>>(
1237 &mut self,
1238 csv_path: P,
1239 max_puzzles: Option<usize>,
1240 ) -> Result<(), Box<dyn std::error::Error>> {
1241 match max_puzzles {
1242 Some(limit) => {
1243 println!("š Loading Lichess puzzles (limited to {limit} puzzles)...");
1244 let puzzle_entries =
1245 crate::lichess_loader::load_lichess_puzzles_basic_with_moves(csv_path, limit)?;
1246
1247 for (board, evaluation, best_move) in puzzle_entries {
1248 self.add_position_with_move(
1249 &board,
1250 evaluation,
1251 Some(best_move),
1252 Some(evaluation),
1253 );
1254 }
1255 }
1256 None => {
1257 self.load_lichess_puzzles(csv_path)?;
1259 return Ok(());
1260 }
1261 }
1262
1263 println!("ā
Lichess puzzle loading complete!");
1264 Ok(())
1265 }
1266
1267 pub fn new_with_auto_load(vector_size: usize) -> Result<Self, Box<dyn std::error::Error>> {
1269 let mut engine = Self::new(vector_size);
1270 engine.enable_opening_book();
1271
1272 let loaded_files = engine.auto_load_training_data()?;
1274
1275 if loaded_files.is_empty() {
1276 println!("š¤ Created fresh engine (no training data found)");
1277 } else {
1278 println!(
1279 "š Created engine with auto-loaded training data from {} files",
1280 loaded_files.len()
1281 );
1282 let _stats = engine.training_stats();
1283 println!("Loading complete");
1284 println!("Loading complete");
1285 }
1286
1287 Ok(engine)
1288 }
1289
1290 pub fn new_with_fast_load(vector_size: usize) -> Result<Self, Box<dyn std::error::Error>> {
1293 use indicatif::{ProgressBar, ProgressStyle};
1294
1295 let mut engine = Self::new(vector_size);
1296 engine.enable_opening_book();
1297
1298 if let Err(_e) = engine.enable_persistence("chess_vector_engine.db") {
1300 println!("Loading complete");
1301 }
1302
1303 let binary_files = [
1305 "training_data_a100.bin", "training_data.bin",
1307 "tactical_training_data.bin",
1308 "engine_training.bin",
1309 "chess_training.bin",
1310 ];
1311
1312 let existing_binary_files: Vec<_> = binary_files
1314 .iter()
1315 .filter(|&file_path| std::path::Path::new(file_path).exists())
1316 .collect();
1317
1318 let mut loaded_count = 0;
1319
1320 if !existing_binary_files.is_empty() {
1321 println!(
1322 "ā” Fast loading: Found {} binary files",
1323 existing_binary_files.len()
1324 );
1325
1326 let pb = ProgressBar::new(existing_binary_files.len() as u64);
1328 pb.set_style(
1329 ProgressStyle::default_bar()
1330 .template("š Fast loading [{elapsed_precise}] [{bar:40.green/cyan}] {pos}/{len} {msg}")?
1331 .progress_chars("āāā")
1332 );
1333
1334 for (i, file_path) in existing_binary_files.iter().enumerate() {
1335 pb.set_position(i as u64);
1336 pb.set_message("Processing...".to_string());
1337
1338 if engine.load_training_data_binary(file_path).is_ok() {
1339 loaded_count += 1;
1340 }
1341 }
1342
1343 pb.set_position(existing_binary_files.len() as u64);
1344 pb.finish_with_message(format!("ā
Loaded {loaded_count} binary files"));
1345 } else {
1346 println!("š¦ No binary files found, falling back to JSON auto-loading...");
1347 let _ = engine.auto_load_training_data()?;
1348 }
1349
1350 if let Err(e) = engine.load_manifold_models() {
1352 println!("ā ļø No pre-trained manifold models found ({e})");
1353 println!(" Use --rebuild-models flag to train new models");
1354 }
1355
1356 let stats = engine.training_stats();
1357 println!(
1358 "ā” Fast engine ready with {} positions ({} binary files loaded)",
1359 stats.total_positions, loaded_count
1360 );
1361
1362 Ok(engine)
1363 }
1364
1365 pub fn new_with_auto_discovery(vector_size: usize) -> Result<Self, Box<dyn std::error::Error>> {
1368 println!("š Initializing engine with AUTO-DISCOVERY and format consolidation...");
1369 let mut engine = Self::new(vector_size);
1370 engine.enable_opening_book();
1371
1372 if let Err(_e) = engine.enable_persistence("chess_vector_engine.db") {
1374 println!("Loading complete");
1375 }
1376
1377 let discovered_files = AutoDiscovery::discover_training_files(".", true)?;
1379
1380 if discovered_files.is_empty() {
1381 println!("ā¹ļø No training data found. Use convert methods to create optimized files.");
1382 return Ok(engine);
1383 }
1384
1385 let consolidated = AutoDiscovery::consolidate_by_base_name(discovered_files.clone());
1387
1388 let mut total_loaded = 0;
1389 for (base_name, best_file) in &consolidated {
1390 println!("š Loading {} ({})", base_name, best_file.format);
1391
1392 let initial_size = engine.knowledge_base_size();
1393 engine.load_file_by_format(&best_file.path, &best_file.format)?;
1394 let loaded_count = engine.knowledge_base_size() - initial_size;
1395 total_loaded += loaded_count;
1396
1397 println!(" ā
Loaded {loaded_count} positions");
1398 }
1399
1400 let cleanup_candidates = AutoDiscovery::get_cleanup_candidates(&discovered_files);
1402 if !cleanup_candidates.is_empty() {
1403 println!(
1404 "š§¹ Found {} old format files that can be cleaned up:",
1405 cleanup_candidates.len()
1406 );
1407 AutoDiscovery::cleanup_old_formats(&cleanup_candidates, true)?; println!(" š” To actually remove old files, run: cargo run --bin cleanup_formats");
1410 }
1411
1412 if let Err(e) = engine.load_manifold_models() {
1414 println!("ā ļø No pre-trained manifold models found ({e})");
1415 }
1416
1417 println!(
1418 "šÆ Engine ready: {} positions loaded from {} datasets",
1419 total_loaded,
1420 consolidated.len()
1421 );
1422 Ok(engine)
1423 }
1424
1425 pub fn new_with_instant_load(vector_size: usize) -> Result<Self, Box<dyn std::error::Error>> {
1428 println!("š Initializing engine with INSTANT loading...");
1429 let mut engine = Self::new(vector_size);
1430 engine.enable_opening_book();
1431
1432 if let Err(_e) = engine.enable_persistence("chess_vector_engine.db") {
1434 println!("Loading complete");
1435 }
1436
1437 let discovered_files = AutoDiscovery::discover_training_files(".", false)?;
1439
1440 if discovered_files.is_empty() {
1441 println!("ā¹ļø No user training data found, loading starter dataset...");
1443 if let Err(_e) = engine.load_starter_dataset() {
1444 println!("Loading complete");
1445 println!("ā¹ļø Starting with empty engine");
1446 } else {
1447 println!(
1448 "ā
Loaded starter dataset with {} positions",
1449 engine.knowledge_base_size()
1450 );
1451 }
1452 return Ok(engine);
1453 }
1454
1455 if let Some(best_file) = discovered_files.first() {
1457 println!(
1458 "ā” Loading {} format: {}",
1459 best_file.format,
1460 best_file.path.display()
1461 );
1462 engine.load_file_by_format(&best_file.path, &best_file.format)?;
1463 println!(
1464 "ā
Loaded {} positions from {} format",
1465 engine.knowledge_base_size(),
1466 best_file.format
1467 );
1468 }
1469
1470 if let Err(e) = engine.load_manifold_models() {
1472 println!("ā ļø No pre-trained manifold models found ({e})");
1473 }
1474
1475 println!(
1476 "šÆ Engine ready: {} positions loaded",
1477 engine.knowledge_base_size()
1478 );
1479 Ok(engine)
1480 }
1481
1482 fn is_position_safe(&self, board: &Board) -> bool {
1487 match std::panic::catch_unwind(|| {
1489 use chess::MoveGen;
1490 let _legal_moves: Vec<ChessMove> = MoveGen::new_legal(board).collect();
1491 true
1492 }) {
1493 Ok(_) => true,
1494 Err(_) => {
1495 false
1497 }
1498 }
1499 }
1500
1501 pub fn check_gpu_acceleration(&self) -> Result<(), Box<dyn std::error::Error>> {
1503 match crate::gpu_acceleration::GPUAccelerator::new() {
1505 Ok(_) => {
1506 println!("š„ GPU acceleration available and ready");
1507 Ok(())
1508 }
1509 Err(_e) => Err("Processing...".to_string().into()),
1510 }
1511 }
1512
1513 pub fn load_starter_dataset(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1515 let starter_data = if let Ok(file_content) =
1517 std::fs::read_to_string("training_data/starter_dataset.json")
1518 {
1519 file_content
1520 } else {
1521 r#"[
1523 {
1524 "fen": "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1",
1525 "evaluation": 0.0,
1526 "best_move": null,
1527 "depth": 0
1528 },
1529 {
1530 "fen": "rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq e3 0 1",
1531 "evaluation": 0.1,
1532 "best_move": "e7e5",
1533 "depth": 2
1534 },
1535 {
1536 "fen": "rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq e6 0 2",
1537 "evaluation": 0.0,
1538 "best_move": "g1f3",
1539 "depth": 2
1540 }
1541 ]"#
1542 .to_string()
1543 };
1544
1545 let training_data: Vec<serde_json::Value> = serde_json::from_str(&starter_data)?;
1546
1547 for entry in training_data {
1548 if let (Some(fen), Some(evaluation)) = (entry.get("fen"), entry.get("evaluation")) {
1549 if let (Some(fen_str), Some(eval_f64)) = (fen.as_str(), evaluation.as_f64()) {
1550 match chess::Board::from_str(fen_str) {
1551 Ok(board) => {
1552 let mut eval = eval_f64 as f32;
1554
1555 if eval.abs() > 15.0 {
1558 eval /= 100.0;
1559 }
1560
1561 self.add_position(&board, eval);
1562 }
1563 Err(_) => {
1564 continue;
1566 }
1567 }
1568 }
1569 }
1570 }
1571
1572 Ok(())
1573 }
1574
1575 fn load_file_by_format(
1577 &mut self,
1578 path: &std::path::Path,
1579 format: &str,
1580 ) -> Result<(), Box<dyn std::error::Error>> {
1581 let file_size = std::fs::metadata(path)?.len();
1583
1584 if file_size > 10_000_000 {
1586 println!(
1587 "š Large file detected ({:.1} MB) - using ultra-fast loader",
1588 file_size as f64 / 1_000_000.0
1589 );
1590 return self.ultra_fast_load_any_format(path);
1591 }
1592
1593 match format {
1595 "MMAP" => self.load_training_data_mmap(path),
1596 "MSGPACK" => self.load_training_data_msgpack(path),
1597 "BINARY" => self.load_training_data_streaming_binary(path),
1598 "ZSTD" => self.load_training_data_compressed(path),
1599 "JSON" => self.load_training_data_streaming_json_v2(path),
1600 _ => Err("Processing...".to_string().into()),
1601 }
1602 }
1603
1604 pub fn ultra_fast_load_any_format<P: AsRef<std::path::Path>>(
1606 &mut self,
1607 path: P,
1608 ) -> Result<(), Box<dyn std::error::Error>> {
1609 let mut loader = UltraFastLoader::new_for_massive_datasets();
1610 loader.ultra_load_binary(path, self)?;
1611
1612 let stats = loader.get_stats();
1613 println!("š Ultra-fast loading complete:");
1614 println!(" ā
Loaded: {} positions", stats.loaded);
1615 println!("Loading complete");
1616 println!("Loading complete");
1617 println!(" š Success rate: {:.1}%", stats.success_rate() * 100.0);
1618
1619 Ok(())
1620 }
1621
1622 pub fn load_training_data_streaming_binary<P: AsRef<std::path::Path>>(
1625 &mut self,
1626 path: P,
1627 ) -> Result<(), Box<dyn std::error::Error>> {
1628 let mut loader = StreamingLoader::new();
1629 loader.stream_load_binary(path, self)?;
1630
1631 println!("š Streaming binary load complete:");
1632 println!(" Loaded: {} new positions", loader.loaded_count);
1633 println!("Loading complete");
1634 println!("Loading complete");
1635
1636 Ok(())
1637 }
1638
1639 pub fn load_training_data_streaming_json_v2<P: AsRef<std::path::Path>>(
1642 &mut self,
1643 path: P,
1644 ) -> Result<(), Box<dyn std::error::Error>> {
1645 let mut loader = StreamingLoader::new();
1646
1647 let batch_size = if std::fs::metadata(path.as_ref())?.len() > 100_000_000 {
1649 20000 } else {
1652 5000 };
1654
1655 loader.stream_load_json(path, self, batch_size)?;
1656
1657 println!("š Streaming JSON load complete:");
1658 println!(" Loaded: {} new positions", loader.loaded_count);
1659 println!("Loading complete");
1660 println!("Loading complete");
1661
1662 Ok(())
1663 }
1664
1665 pub fn new_for_massive_datasets(
1668 vector_size: usize,
1669 ) -> Result<Self, Box<dyn std::error::Error>> {
1670 println!("š Initializing engine for MASSIVE datasets (100k-1M+ positions)...");
1671 let mut engine = Self::new(vector_size);
1672 engine.enable_opening_book();
1673
1674 let discovered_files = AutoDiscovery::discover_training_files(".", false)?;
1676
1677 if discovered_files.is_empty() {
1678 println!("ā¹ļø No training data found");
1679 return Ok(engine);
1680 }
1681
1682 let largest_file = discovered_files
1684 .iter()
1685 .max_by_key(|f| f.size_bytes)
1686 .unwrap();
1687
1688 println!(
1689 "šÆ Loading largest dataset: {} ({} bytes)",
1690 largest_file.path.display(),
1691 largest_file.size_bytes
1692 );
1693
1694 engine.ultra_fast_load_any_format(&largest_file.path)?;
1696
1697 println!(
1698 "šÆ Engine ready: {} positions loaded",
1699 engine.knowledge_base_size()
1700 );
1701 Ok(engine)
1702 }
1703
1704 pub fn convert_to_msgpack() -> Result<(), Box<dyn std::error::Error>> {
1707 use serde_json::Value;
1708 use std::fs::File;
1709 use std::io::{BufReader, BufWriter};
1710
1711 if std::path::Path::new("training_data_a100.bin").exists() {
1713 Self::convert_a100_binary_to_json()?;
1714 }
1715
1716 let input_files = [
1717 "training_data.json",
1718 "tactical_training_data.json",
1719 "training_data_a100.json",
1720 ];
1721
1722 for input_file in &input_files {
1723 let input_path = std::path::Path::new(input_file);
1724 if !input_path.exists() {
1725 continue;
1726 }
1727
1728 let output_file_path = input_file.replace(".json", ".msgpack");
1729 println!("š Converting {input_file} ā {output_file_path} (MessagePack format)");
1730
1731 let file = File::open(input_path)?;
1733 let reader = BufReader::new(file);
1734 let json_value: Value = serde_json::from_reader(reader)?;
1735
1736 let data: Vec<(String, f32)> = match json_value {
1737 Value::Array(arr) if !arr.is_empty() => {
1739 if let Some(first) = arr.first() {
1740 if first.is_array() {
1741 arr.into_iter()
1743 .filter_map(|item| {
1744 if let Value::Array(tuple) = item {
1745 if tuple.len() >= 2 {
1746 let fen = tuple[0].as_str()?.to_string();
1747 let mut eval = tuple[1].as_f64()? as f32;
1748
1749 if eval.abs() > 15.0 {
1753 eval /= 100.0;
1754 }
1755
1756 Some((fen, eval))
1757 } else {
1758 None
1759 }
1760 } else {
1761 None
1762 }
1763 })
1764 .collect()
1765 } else if first.is_object() {
1766 arr.into_iter()
1768 .filter_map(|item| {
1769 if let Value::Object(obj) = item {
1770 let fen = obj.get("fen")?.as_str()?.to_string();
1771 let mut eval = obj.get("evaluation")?.as_f64()? as f32;
1772
1773 if eval.abs() > 15.0 {
1777 eval /= 100.0;
1778 }
1779
1780 Some((fen, eval))
1781 } else {
1782 None
1783 }
1784 })
1785 .collect()
1786 } else {
1787 return Err("Processing...".to_string().into());
1788 }
1789 } else {
1790 Vec::new()
1791 }
1792 }
1793 _ => return Err("Processing...".to_string().into()),
1794 };
1795
1796 if data.is_empty() {
1797 println!("Loading complete");
1798 continue;
1799 }
1800
1801 let output_file = File::create(&output_file_path)?;
1803 let mut writer = BufWriter::new(output_file);
1804 rmp_serde::encode::write(&mut writer, &data)?;
1805
1806 let input_size = input_path.metadata()?.len();
1807 let output_size = std::path::Path::new(&output_file_path).metadata()?.len();
1808 let ratio = input_size as f64 / output_size as f64;
1809
1810 println!(
1811 "ā
Converted: {} ā {} ({:.1}x size reduction, {} positions)",
1812 Self::format_bytes(input_size),
1813 Self::format_bytes(output_size),
1814 ratio,
1815 data.len()
1816 );
1817 }
1818
1819 Ok(())
1820 }
1821
1822 pub fn convert_a100_binary_to_json() -> Result<(), Box<dyn std::error::Error>> {
1824 use std::fs::File;
1825 use std::io::BufWriter;
1826
1827 let binary_path = "training_data_a100.bin";
1828 let json_path = "training_data_a100.json";
1829
1830 if !std::path::Path::new(binary_path).exists() {
1831 println!("Loading complete");
1832 return Ok(());
1833 }
1834
1835 println!("š Converting A100 binary data {binary_path} ā {json_path} (JSON format)");
1836
1837 let mut engine = ChessVectorEngine::new(1024);
1839 engine.load_training_data_binary(binary_path)?;
1840
1841 let mut data = Vec::new();
1843 for (i, board) in engine.position_boards.iter().enumerate() {
1844 if i < engine.position_evaluations.len() {
1845 data.push(serde_json::json!({
1846 "fen": board.to_string(),
1847 "evaluation": engine.position_evaluations[i],
1848 "depth": 15,
1849 "game_id": i
1850 }));
1851 }
1852 }
1853
1854 let file = File::create(json_path)?;
1856 let writer = BufWriter::new(file);
1857 serde_json::to_writer(writer, &data)?;
1858
1859 println!(
1860 "ā
Converted A100 data: {} positions ā {}",
1861 data.len(),
1862 json_path
1863 );
1864 Ok(())
1865 }
1866
1867 pub fn convert_to_zstd() -> Result<(), Box<dyn std::error::Error>> {
1870 use std::fs::File;
1871 use std::io::{BufReader, BufWriter};
1872
1873 if std::path::Path::new("training_data_a100.bin").exists() {
1875 Self::convert_a100_binary_to_json()?;
1876 }
1877
1878 let input_files = [
1879 ("training_data.json", "training_data.zst"),
1880 ("tactical_training_data.json", "tactical_training_data.zst"),
1881 ("training_data_a100.json", "training_data_a100.zst"),
1882 ("training_data.bin", "training_data.bin.zst"),
1883 (
1884 "tactical_training_data.bin",
1885 "tactical_training_data.bin.zst",
1886 ),
1887 ("training_data_a100.bin", "training_data_a100.bin.zst"),
1888 ];
1889
1890 for (input_file, output_file) in &input_files {
1891 let input_path = std::path::Path::new(input_file);
1892 if !input_path.exists() {
1893 continue;
1894 }
1895
1896 println!("š Converting {input_file} ā {output_file} (Zstd compression)");
1897
1898 let input_file = File::open(input_path)?;
1899 let output_file_handle = File::create(output_file)?;
1900 let writer = BufWriter::new(output_file_handle);
1901 let mut encoder = zstd::stream::Encoder::new(writer, 9)?; std::io::copy(&mut BufReader::new(input_file), &mut encoder)?;
1904 encoder.finish()?;
1905
1906 let input_size = input_path.metadata()?.len();
1907 let output_size = std::path::Path::new(output_file).metadata()?.len();
1908 let ratio = input_size as f64 / output_size as f64;
1909
1910 println!(
1911 "ā
Compressed: {} ā {} ({:.1}x size reduction)",
1912 Self::format_bytes(input_size),
1913 Self::format_bytes(output_size),
1914 ratio
1915 );
1916 }
1917
1918 Ok(())
1919 }
1920
1921 pub fn convert_to_mmap() -> Result<(), Box<dyn std::error::Error>> {
1924 use std::fs::File;
1925 use std::io::{BufReader, BufWriter};
1926
1927 if std::path::Path::new("training_data_a100.bin").exists() {
1929 Self::convert_a100_binary_to_json()?;
1930 }
1931
1932 let input_files = [
1933 ("training_data.json", "training_data.mmap"),
1934 ("tactical_training_data.json", "tactical_training_data.mmap"),
1935 ("training_data_a100.json", "training_data_a100.mmap"),
1936 ("training_data.msgpack", "training_data.mmap"),
1937 (
1938 "tactical_training_data.msgpack",
1939 "tactical_training_data.mmap",
1940 ),
1941 ("training_data_a100.msgpack", "training_data_a100.mmap"),
1942 ];
1943
1944 for (input_file, output_file) in &input_files {
1945 let input_path = std::path::Path::new(input_file);
1946 if !input_path.exists() {
1947 continue;
1948 }
1949
1950 println!("š Converting {input_file} ā {output_file} (Memory-mapped format)");
1951
1952 let data: Vec<(String, f32)> = if input_file.ends_with(".json") {
1954 let file = File::open(input_path)?;
1955 let reader = BufReader::new(file);
1956 let json_value: Value = serde_json::from_reader(reader)?;
1957
1958 match json_value {
1959 Value::Array(arr) if !arr.is_empty() => {
1961 if let Some(first) = arr.first() {
1962 if first.is_array() {
1963 arr.into_iter()
1965 .filter_map(|item| {
1966 if let Value::Array(tuple) = item {
1967 if tuple.len() >= 2 {
1968 let fen = tuple[0].as_str()?.to_string();
1969 let mut eval = tuple[1].as_f64()? as f32;
1970
1971 if eval.abs() > 15.0 {
1975 eval /= 100.0;
1976 }
1977
1978 Some((fen, eval))
1979 } else {
1980 None
1981 }
1982 } else {
1983 None
1984 }
1985 })
1986 .collect()
1987 } else if first.is_object() {
1988 arr.into_iter()
1990 .filter_map(|item| {
1991 if let Value::Object(obj) = item {
1992 let fen = obj.get("fen")?.as_str()?.to_string();
1993 let mut eval = obj.get("evaluation")?.as_f64()? as f32;
1994
1995 if eval.abs() > 15.0 {
1999 eval /= 100.0;
2000 }
2001
2002 Some((fen, eval))
2003 } else {
2004 None
2005 }
2006 })
2007 .collect()
2008 } else {
2009 return Err("Failed to process training data".into());
2010 }
2011 } else {
2012 Vec::new()
2013 }
2014 }
2015 _ => return Err("Processing...".to_string().into()),
2016 }
2017 } else if input_file.ends_with(".msgpack") {
2018 let file = File::open(input_path)?;
2019 let reader = BufReader::new(file);
2020 rmp_serde::from_read(reader)?
2021 } else {
2022 return Err("Unsupported input format for memory mapping".into());
2023 };
2024
2025 let output_file_handle = File::create(output_file)?;
2027 let mut writer = BufWriter::new(output_file_handle);
2028 rmp_serde::encode::write(&mut writer, &data)?;
2029
2030 let input_size = input_path.metadata()?.len();
2031 let output_size = std::path::Path::new(output_file).metadata()?.len();
2032
2033 println!(
2034 "ā
Memory-mapped file created: {} ā {} ({} positions)",
2035 Self::format_bytes(input_size),
2036 Self::format_bytes(output_size),
2037 data.len()
2038 );
2039 }
2040
2041 Ok(())
2042 }
2043
2044 pub fn convert_json_to_binary() -> Result<Vec<String>, Box<dyn std::error::Error>> {
2046 use indicatif::{ProgressBar, ProgressStyle};
2047
2048 let json_files = [
2049 "training_data.json",
2050 "tactical_training_data.json",
2051 "engine_training.json",
2052 "chess_training.json",
2053 ];
2054
2055 let existing_json_files: Vec<_> = json_files
2057 .iter()
2058 .filter(|&file_path| std::path::Path::new(file_path).exists())
2059 .collect();
2060
2061 if existing_json_files.is_empty() {
2062 println!("ā¹ļø No JSON training files found to convert");
2063 return Ok(Vec::new());
2064 }
2065
2066 println!(
2067 "š Converting {} JSON files to binary format...",
2068 existing_json_files.len()
2069 );
2070
2071 let pb = ProgressBar::new(existing_json_files.len() as u64);
2073 pb.set_style(
2074 ProgressStyle::default_bar()
2075 .template(
2076 "š¦ Converting [{elapsed_precise}] [{bar:40.yellow/blue}] {pos}/{len} {msg}",
2077 )?
2078 .progress_chars("āāā"),
2079 );
2080
2081 let mut converted_files = Vec::new();
2082
2083 for (i, json_file) in existing_json_files.iter().enumerate() {
2084 pb.set_position(i as u64);
2085 pb.set_message("Processing...".to_string());
2086
2087 let binary_file = std::path::Path::new(json_file).with_extension("bin");
2088
2089 let mut temp_engine = Self::new(1024);
2091 if temp_engine
2092 .load_training_data_incremental(json_file)
2093 .is_ok()
2094 {
2095 if temp_engine.save_training_data_binary(&binary_file).is_ok() {
2096 converted_files.push(binary_file.to_string_lossy().to_string());
2097 println!("ā
Converted {json_file} to binary format");
2098 } else {
2099 println!("Loading complete");
2100 }
2101 } else {
2102 println!("Loading complete");
2103 }
2104 }
2105
2106 pb.set_position(existing_json_files.len() as u64);
2107 pb.finish_with_message(format!("ā
Converted {} files", converted_files.len()));
2108
2109 if !converted_files.is_empty() {
2110 println!("š Binary conversion complete! Startup will be 5-15x faster next time.");
2111 println!("š Conversion summary:");
2112 for _conversion in &converted_files {
2113 println!("Loading complete");
2114 }
2115 }
2116
2117 Ok(converted_files)
2118 }
2119
2120 pub fn is_lsh_enabled(&self) -> bool {
2122 self.use_lsh
2123 }
2124
2125 pub fn lsh_stats(&self) -> Option<crate::lsh::LSHStats> {
2127 self.lsh_index.as_ref().map(|lsh| lsh.stats())
2128 }
2129
2130 pub fn enable_manifold_learning(&mut self, compression_ratio: f32) -> Result<(), String> {
2132 let input_dim = self.encoder.vector_size();
2133 let output_dim = ((input_dim as f32) / compression_ratio) as usize;
2134
2135 if output_dim == 0 {
2136 return Err("Compression ratio too high, output dimension would be 0".to_string());
2137 }
2138
2139 let mut learner = ManifoldLearner::new(input_dim, output_dim);
2140 learner.init_network()?;
2141
2142 self.manifold_learner = Some(learner);
2143 self.manifold_similarity_search = Some(SimilaritySearch::new(output_dim));
2144 self.use_manifold = false; Ok(())
2147 }
2148
2149 pub fn train_manifold_learning(&mut self, epochs: usize) -> Result<(), String> {
2151 if self.manifold_learner.is_none() {
2152 return Err(
2153 "Manifold learning not enabled. Call enable_manifold_learning first.".to_string(),
2154 );
2155 }
2156
2157 if self.similarity_search.size() == 0 {
2158 return Err("No positions in knowledge base to train on.".to_string());
2159 }
2160
2161 let rows = self.similarity_search.size();
2163 let cols = self.encoder.vector_size();
2164
2165 let training_matrix = Array2::from_shape_fn((rows, cols), |(row, col)| {
2166 if let Some((vector, _)) = self.similarity_search.get_position_ref(row) {
2167 vector[col]
2168 } else {
2169 0.0
2170 }
2171 });
2172
2173 if let Some(ref mut learner) = self.manifold_learner {
2175 learner.train(&training_matrix, epochs)?;
2176 let compression_ratio = learner.compression_ratio();
2177
2178 let _ = learner;
2180
2181 self.rebuild_manifold_indices()?;
2183 self.use_manifold = true;
2184
2185 println!(
2186 "Manifold learning training completed. Compression ratio: {compression_ratio:.1}x"
2187 );
2188 }
2189
2190 Ok(())
2191 }
2192
2193 fn rebuild_manifold_indices(&mut self) -> Result<(), String> {
2195 if let Some(ref learner) = self.manifold_learner {
2196 let output_dim = learner.output_dim();
2198 if let Some(ref mut search) = self.manifold_similarity_search {
2199 *search = SimilaritySearch::new(output_dim);
2200 }
2201 if let Some(ref mut lsh) = self.manifold_lsh_index {
2202 *lsh = LSH::new(output_dim, 8, 16); }
2204
2205 for (vector, eval) in self.similarity_search.iter_positions() {
2207 let compressed = learner.encode(vector);
2208
2209 if let Some(ref mut search) = self.manifold_similarity_search {
2210 search.add_position(compressed.clone(), eval);
2211 }
2212
2213 if let Some(ref mut lsh) = self.manifold_lsh_index {
2214 lsh.add_vector(compressed, eval);
2215 }
2216 }
2217 }
2218
2219 Ok(())
2220 }
2221
2222 pub fn enable_manifold_lsh(
2224 &mut self,
2225 num_tables: usize,
2226 hash_size: usize,
2227 ) -> Result<(), String> {
2228 if self.manifold_learner.is_none() {
2229 return Err("Manifold learning not enabled".to_string());
2230 }
2231
2232 let output_dim = self.manifold_learner.as_ref().unwrap().output_dim();
2233 self.manifold_lsh_index = Some(LSH::new(output_dim, num_tables, hash_size));
2234
2235 if self.use_manifold {
2237 self.rebuild_manifold_indices()?;
2238 }
2239
2240 Ok(())
2241 }
2242
2243 pub fn is_manifold_enabled(&self) -> bool {
2245 self.use_manifold && self.manifold_learner.is_some()
2246 }
2247
2248 pub fn manifold_compression_ratio(&self) -> Option<f32> {
2250 self.manifold_learner
2251 .as_ref()
2252 .map(|l| l.compression_ratio())
2253 }
2254
2255 pub fn load_manifold_models(&mut self) -> Result<(), Box<dyn std::error::Error>> {
2258 if let Some(ref db) = self.database {
2259 match crate::manifold_learner::ManifoldLearner::load_from_database(db)? {
2260 Some(learner) => {
2261 let compression_ratio = learner.compression_ratio();
2262 println!(
2263 "š§ Loaded pre-trained manifold learner (compression: {compression_ratio:.1}x)"
2264 );
2265
2266 self.manifold_learner = Some(learner);
2268 self.use_manifold = true;
2269
2270 self.rebuild_manifold_indices()?;
2272
2273 println!("ā
Manifold learning enabled with compressed vectors");
2274 Ok(())
2275 }
2276 None => Err("No pre-trained manifold models found in database".into()),
2277 }
2278 } else {
2279 Err("Database not initialized - cannot load manifold models".into())
2280 }
2281 }
2282
2283 pub fn enable_opening_book(&mut self) {
2285 self.opening_book = Some(OpeningBook::with_standard_openings());
2286 }
2287
2288 pub fn set_opening_book(&mut self, book: OpeningBook) {
2290 self.opening_book = Some(book);
2291 }
2292
2293 pub fn is_opening_position(&self, board: &Board) -> bool {
2295 self.opening_book
2296 .as_ref()
2297 .map(|book| book.contains(board))
2298 .unwrap_or(false)
2299 }
2300
2301 pub fn get_opening_entry(&self, board: &Board) -> Option<&OpeningEntry> {
2303 self.opening_book.as_ref()?.lookup(board)
2304 }
2305
2306 pub fn opening_book_stats(&self) -> Option<OpeningBookStats> {
2308 self.opening_book.as_ref().map(|book| book.stats())
2309 }
2310
2311 pub fn add_position_with_move(
2313 &mut self,
2314 board: &Board,
2315 evaluation: f32,
2316 chess_move: Option<ChessMove>,
2317 move_outcome: Option<f32>,
2318 ) {
2319 let position_index = self.knowledge_base_size();
2320
2321 self.add_position(board, evaluation);
2323
2324 if let (Some(mov), Some(outcome)) = (chess_move, move_outcome) {
2326 self.position_moves
2327 .entry(position_index)
2328 .or_default()
2329 .push((mov, outcome));
2330 }
2331 }
2332
2333 pub fn recommend_moves(
2335 &mut self,
2336 board: &Board,
2337 num_recommendations: usize,
2338 ) -> Vec<MoveRecommendation> {
2339 if let Some(entry) = self.get_opening_entry(board) {
2353 let mut recommendations = Vec::new();
2354
2355 for (chess_move, strength) in &entry.best_moves {
2356 recommendations.push(MoveRecommendation {
2357 chess_move: *chess_move,
2358 confidence: strength * 0.9, from_similar_position_count: 1,
2360 average_outcome: entry.evaluation,
2361 });
2362 }
2363
2364 recommendations.sort_by(|a, b| {
2366 b.confidence
2367 .partial_cmp(&a.confidence)
2368 .unwrap_or(std::cmp::Ordering::Equal)
2369 });
2370 recommendations.truncate(num_recommendations);
2371 return recommendations;
2372 }
2373
2374 let similar_positions = self.find_similar_positions_with_indices(board, 20);
2376
2377 let mut move_data: HashMap<ChessMove, Vec<(f32, f32)>> = HashMap::new(); use chess::MoveGen;
2382 let legal_moves: Vec<ChessMove> = match std::panic::catch_unwind(|| {
2383 MoveGen::new_legal(board).collect::<Vec<ChessMove>>()
2384 }) {
2385 Ok(moves) => moves,
2386 Err(_) => {
2387 return Vec::new();
2389 }
2390 };
2391
2392 for (position_index, _eval, similarity) in similar_positions {
2394 if let Some(moves) = self.position_moves.get(&position_index) {
2395 for &(chess_move, outcome) in moves {
2396 if legal_moves.contains(&chess_move) {
2398 move_data
2399 .entry(chess_move)
2400 .or_default()
2401 .push((similarity, outcome));
2402 }
2403 }
2404 }
2405 }
2406
2407 if self.tactical_search.is_some() {
2409 if let Some(ref mut tactical_search) = self.tactical_search {
2410 let tactical_result = tactical_search.search(board);
2412
2413 if let Some(best_move) = tactical_result.best_move {
2415 let mut temp_board = *board;
2417 temp_board = temp_board.make_move_new(best_move);
2418 let move_evaluation = tactical_search.search(&temp_board).evaluation;
2419
2420 move_data.insert(best_move, vec![(0.75, move_evaluation)]);
2421 }
2422
2423 let mut ordered_moves = legal_moves.clone();
2426
2427 ordered_moves.sort_by(|a, b| {
2429 let a_is_capture = board.piece_on(a.get_dest()).is_some();
2430 let b_is_capture = board.piece_on(b.get_dest()).is_some();
2431
2432 match (a_is_capture, b_is_capture) {
2433 (true, false) => std::cmp::Ordering::Less, (false, true) => std::cmp::Ordering::Greater, _ => {
2436 let a_centrality = move_centrality(a);
2438 let b_centrality = move_centrality(b);
2439 b_centrality
2440 .partial_cmp(&a_centrality)
2441 .unwrap_or(std::cmp::Ordering::Equal)
2442 }
2443 }
2444 });
2445
2446 for chess_move in ordered_moves.into_iter() {
2449 move_data.entry(chess_move).or_insert_with(|| {
2450 let mut temp_board = *board;
2452 temp_board = temp_board.make_move_new(chess_move);
2453 let move_evaluation = tactical_search.search(&temp_board).evaluation;
2454
2455 vec![(0.6, move_evaluation)]
2456 });
2457 }
2458 } else {
2459 let mut ordered_moves = legal_moves.clone();
2462
2463 ordered_moves.sort_by(|a, b| {
2465 let a_is_capture = board.piece_on(a.get_dest()).is_some();
2466 let b_is_capture = board.piece_on(b.get_dest()).is_some();
2467
2468 match (a_is_capture, b_is_capture) {
2469 (true, false) => std::cmp::Ordering::Less,
2470 (false, true) => std::cmp::Ordering::Greater,
2471 _ => {
2472 let a_centrality = move_centrality(a);
2473 let b_centrality = move_centrality(b);
2474 b_centrality
2475 .partial_cmp(&a_centrality)
2476 .unwrap_or(std::cmp::Ordering::Equal)
2477 }
2478 }
2479 });
2480
2481 for chess_move in ordered_moves.into_iter().take(num_recommendations) {
2482 let mut basic_eval = 0.0;
2484
2485 if let Some(captured_piece) = board.piece_on(chess_move.get_dest()) {
2487 basic_eval += match captured_piece {
2488 chess::Piece::Pawn => 1.0,
2489 chess::Piece::Knight | chess::Piece::Bishop => 3.0,
2490 chess::Piece::Rook => 5.0,
2491 chess::Piece::Queen => 9.0,
2492 chess::Piece::King => 100.0, };
2494 }
2495
2496 move_data.insert(chess_move, vec![(0.3, basic_eval)]); }
2498 }
2499 }
2500
2501 let mut recommendations = Vec::new();
2503
2504 for (chess_move, outcomes) in move_data {
2505 if outcomes.is_empty() {
2506 continue;
2507 }
2508
2509 let mut weighted_sum = 0.0;
2511 let mut weight_sum = 0.0;
2512
2513 for &(similarity, outcome) in &outcomes {
2514 weighted_sum += similarity * outcome;
2515 weight_sum += similarity;
2516 }
2517
2518 let average_outcome = if weight_sum > 0.0 {
2519 weighted_sum / weight_sum
2520 } else {
2521 0.0
2522 };
2523
2524 let avg_similarity =
2526 outcomes.iter().map(|(s, _)| s).sum::<f32>() / outcomes.len() as f32;
2527 let position_count_bonus = (outcomes.len() as f32).ln().max(1.0) / 5.0; let confidence = (avg_similarity * 0.8 + position_count_bonus * 0.2).min(0.95); recommendations.push(MoveRecommendation {
2531 chess_move,
2532 confidence: confidence.min(1.0), from_similar_position_count: outcomes.len(),
2534 average_outcome,
2535 });
2536 }
2537
2538 recommendations.sort_by(|a, b| {
2541 match board.side_to_move() {
2542 chess::Color::White => {
2543 b.average_outcome
2545 .partial_cmp(&a.average_outcome)
2546 .unwrap_or(std::cmp::Ordering::Equal)
2547 }
2548 chess::Color::Black => {
2549 a.average_outcome
2551 .partial_cmp(&b.average_outcome)
2552 .unwrap_or(std::cmp::Ordering::Equal)
2553 }
2554 }
2555 });
2556
2557 recommendations.truncate(num_recommendations);
2559 recommendations
2560 }
2561
2562 pub fn recommend_legal_moves(
2564 &mut self,
2565 board: &Board,
2566 num_recommendations: usize,
2567 ) -> Vec<MoveRecommendation> {
2568 use chess::MoveGen;
2569
2570 let legal_moves: std::collections::HashSet<ChessMove> = MoveGen::new_legal(board).collect();
2572
2573 let all_recommendations = self.recommend_moves(board, num_recommendations * 2); all_recommendations
2577 .into_iter()
2578 .filter(|rec| legal_moves.contains(&rec.chess_move))
2579 .take(num_recommendations)
2580 .collect()
2581 }
2582
2583 pub fn enable_persistence<P: AsRef<Path>>(
2585 &mut self,
2586 db_path: P,
2587 ) -> Result<(), Box<dyn std::error::Error>> {
2588 let database = Database::new(db_path)?;
2589 self.database = Some(database);
2590 println!("Persistence enabled");
2591 Ok(())
2592 }
2593
2594 pub fn save_to_database(&self) -> Result<(), Box<dyn std::error::Error>> {
2596 let db = self
2597 .database
2598 .as_ref()
2599 .ok_or("Database not enabled. Call enable_persistence() first.")?;
2600
2601 println!("š¾ Saving engine state to database (batch mode)...");
2602
2603 let current_time = std::time::SystemTime::now()
2605 .duration_since(std::time::UNIX_EPOCH)?
2606 .as_secs() as i64;
2607
2608 let mut position_data_batch = Vec::with_capacity(self.position_boards.len());
2609
2610 for (i, board) in self.position_boards.iter().enumerate() {
2611 if i < self.position_vectors.len() && i < self.position_evaluations.len() {
2612 let vector = self.position_vectors[i].as_slice().unwrap();
2613 let position_data = PositionData {
2614 fen: board.to_string(),
2615 vector: vector.iter().map(|&x| x as f64).collect(),
2616 evaluation: Some(self.position_evaluations[i] as f64),
2617 compressed_vector: None, created_at: current_time,
2619 };
2620 position_data_batch.push(position_data);
2621 }
2622 }
2623
2624 if !position_data_batch.is_empty() {
2626 let saved_count = db.save_positions_batch(&position_data_batch)?;
2627 println!("š Batch saved {saved_count} positions");
2628 }
2629
2630 if let Some(ref lsh) = self.lsh_index {
2632 lsh.save_to_database(db)?;
2633 }
2634
2635 if let Some(ref learner) = self.manifold_learner {
2637 if learner.is_trained() {
2638 learner.save_to_database(db)?;
2639 }
2640 }
2641
2642 println!("ā
Engine state saved successfully (batch optimized)");
2643 Ok(())
2644 }
2645
2646 pub fn load_from_database(&mut self) -> Result<(), Box<dyn std::error::Error>> {
2648 let db = self
2649 .database
2650 .as_ref()
2651 .ok_or("Database not enabled. Call enable_persistence() first.")?;
2652
2653 println!("Loading engine state from database...");
2654
2655 let positions = db.load_all_positions()?;
2657 for position_data in positions {
2658 if let Ok(board) = Board::from_str(&position_data.fen) {
2659 let vector: Vec<f32> = position_data.vector.iter().map(|&x| x as f32).collect();
2660 let vector_array = Array1::from(vector);
2661 let mut evaluation = position_data.evaluation.unwrap_or(0.0) as f32;
2662
2663 if evaluation.abs() > 15.0 {
2667 evaluation /= 100.0;
2668 }
2669
2670 self.similarity_search
2672 .add_position(vector_array.clone(), evaluation);
2673
2674 self.position_vectors.push(vector_array);
2676 self.position_boards.push(board);
2677 self.position_evaluations.push(evaluation);
2678 }
2679 }
2680
2681 if self.use_lsh {
2683 let positions_for_lsh: Vec<(Array1<f32>, f32)> = self
2684 .position_vectors
2685 .iter()
2686 .zip(self.position_evaluations.iter())
2687 .map(|(v, &e)| (v.clone(), e))
2688 .collect();
2689
2690 match LSH::load_from_database(db, &positions_for_lsh)? {
2691 Some(lsh) => {
2692 self.lsh_index = Some(lsh);
2693 println!("Loaded LSH configuration from database");
2694 }
2695 None => {
2696 println!("No LSH configuration found in database");
2697 }
2698 }
2699 }
2700
2701 match ManifoldLearner::load_from_database(db)? {
2703 Some(learner) => {
2704 self.manifold_learner = Some(learner);
2705 if self.use_manifold {
2706 self.rebuild_manifold_indices()?;
2707 }
2708 println!("Loaded manifold learner from database");
2709 }
2710 None => {
2711 println!("No manifold learner found in database");
2712 }
2713 }
2714
2715 println!(
2716 "Engine state loaded successfully ({} positions)",
2717 self.knowledge_base_size()
2718 );
2719 Ok(())
2720 }
2721
2722 pub fn new_with_persistence<P: AsRef<Path>>(
2724 vector_size: usize,
2725 db_path: P,
2726 ) -> Result<Self, Box<dyn std::error::Error>> {
2727 let mut engine = Self::new(vector_size);
2728 engine.enable_persistence(db_path)?;
2729
2730 match engine.load_from_database() {
2732 Ok(_) => {
2733 println!("Loaded existing engine from database");
2734 }
2735 Err(e) => {
2736 println!("Starting fresh engine (load failed: {e})");
2737 }
2738 }
2739
2740 Ok(engine)
2741 }
2742
2743 pub fn auto_save(&self) -> Result<(), Box<dyn std::error::Error>> {
2745 if self.database.is_some() {
2746 self.save_to_database()?;
2747 }
2748 Ok(())
2749 }
2750
2751 pub fn is_persistence_enabled(&self) -> bool {
2753 self.database.is_some()
2754 }
2755
2756 pub fn database_position_count(&self) -> Result<i64, Box<dyn std::error::Error>> {
2758 let db = self.database.as_ref().ok_or("Database not enabled")?;
2759 Ok(db.get_position_count()?)
2760 }
2761
2762 pub fn enable_tactical_search(&mut self, config: TacticalConfig) {
2764 self.tactical_search = Some(TacticalSearch::new(config));
2765 }
2766
2767 pub fn enable_tactical_search_default(&mut self) {
2769 self.tactical_search = Some(TacticalSearch::new_default());
2770 }
2771
2772 pub fn configure_hybrid_evaluation(&mut self, config: HybridConfig) {
2774 self.hybrid_config = config;
2775 }
2776
2777 pub fn is_tactical_search_enabled(&self) -> bool {
2779 self.tactical_search.is_some()
2780 }
2781
2782 pub fn enable_parallel_search(&mut self, num_threads: usize) {
2784 if let Some(ref mut tactical_search) = self.tactical_search {
2785 tactical_search.config.enable_parallel_search = true;
2786 tactical_search.config.num_threads = num_threads;
2787 println!("š§µ Parallel tactical search enabled with {num_threads} threads");
2788 }
2789 }
2790
2791 pub fn is_parallel_search_enabled(&self) -> bool {
2793 self.tactical_search
2794 .as_ref()
2795 .map(|ts| ts.config.enable_parallel_search)
2796 .unwrap_or(false)
2797 }
2798
2799 pub fn hybrid_config(&self) -> &HybridConfig {
2820 &self.hybrid_config
2821 }
2822
2823 pub fn is_opening_book_enabled(&self) -> bool {
2825 self.opening_book.is_some()
2826 }
2827
2828 pub fn self_play_training(
2830 &mut self,
2831 config: training::SelfPlayConfig,
2832 ) -> Result<usize, Box<dyn std::error::Error>> {
2833 let mut trainer = training::SelfPlayTrainer::new(config);
2834 let new_data = trainer.generate_training_data(self);
2835
2836 let positions_added = new_data.data.len();
2837
2838 for data in &new_data.data {
2840 self.add_position(&data.board, data.evaluation);
2841 }
2842
2843 if self.database.is_some() {
2845 match self.save_to_database() {
2846 Ok(_) => println!("š¾ Saved {positions_added} positions to database"),
2847 Err(_e) => println!("Loading complete"),
2848 }
2849 }
2850
2851 println!("š§ Self-play training complete: {positions_added} new positions learned");
2852 Ok(positions_added)
2853 }
2854
2855 pub fn continuous_self_play(
2857 &mut self,
2858 config: training::SelfPlayConfig,
2859 iterations: usize,
2860 save_path: Option<&str>,
2861 ) -> Result<usize, Box<dyn std::error::Error>> {
2862 let mut total_positions = 0;
2863 let mut trainer = training::SelfPlayTrainer::new(config.clone());
2864
2865 println!("š Starting continuous self-play training for {iterations} iterations...");
2866
2867 for iteration in 1..=iterations {
2868 println!("\n--- Self-Play Iteration {iteration}/{iterations} ---");
2869
2870 let new_data = trainer.generate_training_data(self);
2872 let batch_size = new_data.data.len();
2873
2874 for data in &new_data.data {
2876 self.add_position(&data.board, data.evaluation);
2877 }
2878
2879 total_positions += batch_size;
2880
2881 println!(
2882 "ā
Iteration {}: Added {} positions (total: {})",
2883 iteration,
2884 batch_size,
2885 self.knowledge_base_size()
2886 );
2887
2888 if iteration % 5 == 0 || iteration == iterations {
2890 if let Some(path) = save_path {
2892 match self.save_training_data_binary(path) {
2893 Ok(_) => println!("š¾ Progress saved to {path} (binary format)"),
2894 Err(_e) => println!("Loading complete"),
2895 }
2896 }
2897
2898 if self.database.is_some() {
2900 match self.save_to_database() {
2901 Ok(_) => println!(
2902 "š¾ Database synchronized ({} total positions)",
2903 self.knowledge_base_size()
2904 ),
2905 Err(_e) => println!("Loading complete"),
2906 }
2907 }
2908 }
2909
2910 if iteration % 10 == 0
2912 && self.knowledge_base_size() > 5000
2913 && self.manifold_learner.is_some()
2914 {
2915 println!("š§ Retraining manifold learning with new data...");
2916 let _ = self.train_manifold_learning(5);
2917 }
2918 }
2919
2920 println!("\nš Continuous self-play complete: {total_positions} total new positions");
2921 Ok(total_positions)
2922 }
2923
2924 pub fn adaptive_self_play(
2926 &mut self,
2927 base_config: training::SelfPlayConfig,
2928 target_strength: f32,
2929 ) -> Result<usize, Box<dyn std::error::Error>> {
2930 let mut current_config = base_config;
2931 let mut total_positions = 0;
2932 let mut iteration = 1;
2933
2934 println!(
2935 "šÆ Starting adaptive self-play training (target strength: {target_strength:.2})..."
2936 );
2937
2938 loop {
2939 println!("\n--- Adaptive Iteration {iteration} ---");
2940
2941 let positions_added = self.self_play_training(current_config.clone())?;
2943 total_positions += positions_added;
2944
2945 if self.database.is_some() {
2947 match self.save_to_database() {
2948 Ok(_) => println!("š¾ Adaptive training progress saved to database"),
2949 Err(_e) => println!("Loading complete"),
2950 }
2951 }
2952
2953 let current_strength = self.knowledge_base_size() as f32 / 10000.0; println!(
2957 "š Current strength estimate: {current_strength:.2} (target: {target_strength:.2})"
2958 );
2959
2960 if current_strength >= target_strength {
2961 println!("š Target strength reached!");
2962 break;
2963 }
2964
2965 current_config.exploration_factor *= 0.95; current_config.temperature *= 0.98; current_config.games_per_iteration =
2969 (current_config.games_per_iteration as f32 * 1.1) as usize; iteration += 1;
2972
2973 if iteration > 50 {
2974 println!("ā ļø Maximum iterations reached");
2975 break;
2976 }
2977 }
2978
2979 Ok(total_positions)
2980 }
2981}
2982
2983#[cfg(test)]
2984mod tests {
2985 use super::*;
2986 use chess::Board;
2987
2988 #[test]
2989 fn test_engine_creation() {
2990 let engine = ChessVectorEngine::new(1024);
2991 assert_eq!(engine.knowledge_base_size(), 0);
2992 }
2993
2994 #[test]
2995 fn test_add_and_search() {
2996 let mut engine = ChessVectorEngine::new(1024);
2997 let board = Board::default();
2998
2999 engine.add_position(&board, 0.0);
3000 assert_eq!(engine.knowledge_base_size(), 1);
3001
3002 let similar = engine.find_similar_positions(&board, 1);
3003 assert_eq!(similar.len(), 1);
3004 }
3005
3006 #[test]
3007 fn test_evaluation() {
3008 let mut engine = ChessVectorEngine::new(1024);
3009 let board = Board::default();
3010
3011 engine.add_position(&board, 0.5);
3013
3014 let evaluation = engine.evaluate_position(&board);
3015 assert!(evaluation.is_some());
3016 assert!((evaluation.unwrap() - 0.5).abs() < 1e-6);
3017 }
3018
3019 #[test]
3020 fn test_move_recommendations() {
3021 let mut engine = ChessVectorEngine::new(1024);
3022 let board = Board::default();
3023
3024 use chess::ChessMove;
3026 use std::str::FromStr;
3027 let mov = ChessMove::from_str("e2e4").unwrap();
3028 engine.add_position_with_move(&board, 0.0, Some(mov), Some(0.8));
3029
3030 let recommendations = engine.recommend_moves(&board, 3);
3031 assert!(!recommendations.is_empty());
3032
3033 let legal_recommendations = engine.recommend_legal_moves(&board, 3);
3035 assert!(!legal_recommendations.is_empty());
3036 }
3037
3038 #[test]
3039 fn test_empty_knowledge_base_fallback() {
3040 let mut engine = ChessVectorEngine::new(1024);
3042
3043 use std::str::FromStr;
3045 let board =
3046 Board::from_str("r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 0 1")
3047 .unwrap();
3048
3049 let recommendations = engine.recommend_moves(&board, 5);
3051 assert!(
3052 !recommendations.is_empty(),
3053 "recommend_moves should not return empty even with no training data"
3054 );
3055 assert_eq!(
3056 recommendations.len(),
3057 5,
3058 "Should return exactly 5 recommendations"
3059 );
3060
3061 for rec in &recommendations {
3063 assert!(rec.confidence > 0.0, "Confidence should be greater than 0");
3064 assert_eq!(
3065 rec.from_similar_position_count, 1,
3066 "Should have count of 1 for fallback"
3067 );
3068 assert_eq!(rec.average_outcome, 0.0, "Should have neutral outcome");
3069 }
3070
3071 let starting_board = Board::default();
3073 let starting_recommendations = engine.recommend_moves(&starting_board, 3);
3074 assert!(
3075 !starting_recommendations.is_empty(),
3076 "Should work for starting position too"
3077 );
3078
3079 use chess::MoveGen;
3081 let legal_moves: std::collections::HashSet<_> = MoveGen::new_legal(&board).collect();
3082 for rec in &recommendations {
3083 assert!(
3084 legal_moves.contains(&rec.chess_move),
3085 "All recommended moves should be legal"
3086 );
3087 }
3088 }
3089
3090 #[test]
3091 fn test_opening_book_integration() {
3092 let mut engine = ChessVectorEngine::new(1024);
3093
3094 engine.enable_opening_book();
3096 assert!(engine.opening_book.is_some());
3097
3098 let board = Board::default();
3100 assert!(engine.is_opening_position(&board));
3101
3102 let entry = engine.get_opening_entry(&board);
3103 assert!(entry.is_some());
3104
3105 let stats = engine.opening_book_stats();
3106 assert!(stats.is_some());
3107 assert!(stats.unwrap().total_positions > 0);
3108
3109 let recommendations = engine.recommend_moves(&board, 3);
3111 assert!(!recommendations.is_empty());
3112 assert!(recommendations[0].confidence > 0.7); }
3114
3115 #[test]
3116 fn test_manifold_learning_integration() {
3117 let mut engine = ChessVectorEngine::new(1024);
3118
3119 let board = Board::default();
3121 for i in 0..10 {
3122 engine.add_position(&board, i as f32 * 0.1);
3123 }
3124
3125 assert!(engine.enable_manifold_learning(8.0).is_ok());
3127
3128 let ratio = engine.manifold_compression_ratio();
3130 assert!(ratio.is_some());
3131 assert!((ratio.unwrap() - 8.0).abs() < 0.1);
3132
3133 assert!(engine.train_manifold_learning(5).is_ok());
3135
3136 let original_similar = engine.find_similar_positions(&board, 3);
3138 assert!(!original_similar.is_empty());
3139 }
3140
3141 #[test]
3142 fn test_lsh_integration() {
3143 let mut engine = ChessVectorEngine::new(1024);
3144
3145 let board = Board::default();
3147 for i in 0..50 {
3148 engine.add_position(&board, i as f32 * 0.02);
3149 }
3150
3151 engine.enable_lsh(4, 8);
3153
3154 let similar = engine.find_similar_positions(&board, 5);
3156 assert!(!similar.is_empty());
3157 assert!(similar.len() <= 5);
3158
3159 let eval = engine.evaluate_position(&board);
3161 assert!(eval.is_some());
3162 }
3163
3164 #[test]
3165 fn test_manifold_lsh_integration() {
3166 let mut engine = ChessVectorEngine::new(1024);
3167
3168 let board = Board::default();
3170 for i in 0..20 {
3171 engine.add_position(&board, i as f32 * 0.05);
3172 }
3173
3174 assert!(engine.enable_manifold_learning(8.0).is_ok());
3176 assert!(engine.train_manifold_learning(3).is_ok());
3177
3178 assert!(engine.enable_manifold_lsh(4, 8).is_ok());
3180
3181 let similar = engine.find_similar_positions(&board, 3);
3183 assert!(!similar.is_empty());
3184
3185 let _recommendations = engine.recommend_moves(&board, 2);
3187 }
3189
3190 #[test]
3215 fn test_position_with_move_storage() {
3216 let mut engine = ChessVectorEngine::new(1024);
3217 let board = Board::default();
3218
3219 use chess::ChessMove;
3220 use std::str::FromStr;
3221 let move1 = ChessMove::from_str("e2e4").unwrap();
3222 let move2 = ChessMove::from_str("d2d4").unwrap();
3223
3224 engine.add_position_with_move(&board, 0.0, Some(move1), Some(0.7));
3226 engine.add_position_with_move(&board, 0.1, Some(move2), Some(0.6));
3227
3228 assert_eq!(engine.position_moves.len(), 2);
3230
3231 let recommendations = engine.recommend_moves(&board, 5);
3233 let _move_strings: Vec<String> = recommendations
3234 .iter()
3235 .map(|r| r.chess_move.to_string())
3236 .collect();
3237
3238 assert!(!recommendations.is_empty());
3240 }
3241
3242 #[test]
3243 fn test_performance_regression_basic() {
3244 use std::time::Instant;
3245
3246 let mut engine = ChessVectorEngine::new(1024);
3247 let board = Board::default();
3248
3249 for i in 0..100 {
3251 engine.add_position(&board, i as f32 * 0.01);
3252 }
3253
3254 let start = Instant::now();
3256
3257 for _ in 0..100 {
3259 engine.add_position(&board, 0.0);
3260 }
3261
3262 let encoding_time = start.elapsed();
3263
3264 let start = Instant::now();
3266 for _ in 0..10 {
3267 engine.find_similar_positions(&board, 5);
3268 }
3269 let search_time = start.elapsed();
3270
3271 assert!(
3273 encoding_time.as_millis() < 10000,
3274 "Position encoding too slow: {}ms",
3275 encoding_time.as_millis()
3276 );
3277 assert!(
3278 search_time.as_millis() < 5000,
3279 "Search too slow: {}ms",
3280 search_time.as_millis()
3281 );
3282 }
3283
3284 #[test]
3285 fn test_memory_usage_reasonable() {
3286 let mut engine = ChessVectorEngine::new(1024);
3287 let board = Board::default();
3288
3289 let initial_size = engine.knowledge_base_size();
3291
3292 for i in 0..1000 {
3293 engine.add_position(&board, i as f32 * 0.001);
3294 }
3295
3296 let final_size = engine.knowledge_base_size();
3297 assert_eq!(final_size, initial_size + 1000);
3298
3299 assert!(final_size > initial_size);
3301 }
3302
3303 #[test]
3304 fn test_incremental_training() {
3305 use std::str::FromStr;
3306
3307 let mut engine = ChessVectorEngine::new(1024);
3308 let board1 = Board::default();
3309 let board2 =
3310 Board::from_str("rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1").unwrap();
3311
3312 engine.add_position(&board1, 0.0);
3314 engine.add_position(&board2, 0.2);
3315 assert_eq!(engine.knowledge_base_size(), 2);
3316
3317 let mut dataset = crate::training::TrainingDataset::new();
3319 dataset.add_position(board1, 0.1, 15, 1); dataset.add_position(
3321 Board::from_str("rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2")
3322 .unwrap(),
3323 0.3,
3324 15,
3325 2,
3326 ); engine.train_from_dataset_incremental(&dataset);
3330
3331 assert_eq!(engine.knowledge_base_size(), 3);
3333
3334 let stats = engine.training_stats();
3336 assert_eq!(stats.total_positions, 3);
3337 assert_eq!(stats.unique_positions, 3);
3338 assert!(!stats.has_move_data); }
3340
3341 #[test]
3342 fn test_save_load_incremental() {
3343 use std::str::FromStr;
3344 use tempfile::tempdir;
3345
3346 let temp_dir = tempdir().unwrap();
3347 let file_path = temp_dir.path().join("test_training.json");
3348
3349 let mut engine1 = ChessVectorEngine::new(1024);
3351 engine1.add_position(&Board::default(), 0.0);
3352 engine1.add_position(
3353 &Board::from_str("rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1").unwrap(),
3354 0.2,
3355 );
3356
3357 engine1.save_training_data(&file_path).unwrap();
3359
3360 let mut engine2 = ChessVectorEngine::new(1024);
3362 engine2.add_position(
3363 &Board::from_str("rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2")
3364 .unwrap(),
3365 0.3,
3366 );
3367 assert_eq!(engine2.knowledge_base_size(), 1);
3368
3369 engine2.load_training_data_incremental(&file_path).unwrap();
3371
3372 assert_eq!(engine2.knowledge_base_size(), 3);
3374 }
3375
3376 #[test]
3377 fn test_training_stats() {
3378 use std::str::FromStr;
3379
3380 let mut engine = ChessVectorEngine::new(1024);
3381
3382 let stats = engine.training_stats();
3384 assert_eq!(stats.total_positions, 0);
3385 assert_eq!(stats.unique_positions, 0);
3386 assert!(!stats.has_move_data);
3387 assert!(!stats.lsh_enabled);
3388 assert!(!stats.manifold_enabled);
3389 assert!(!stats.opening_book_enabled);
3390
3391 engine.add_position(&Board::default(), 0.0);
3393 engine.add_position_with_move(
3394 &Board::default(),
3395 0.1,
3396 Some(ChessMove::from_str("e2e4").unwrap()),
3397 Some(0.8),
3398 );
3399
3400 engine.enable_opening_book();
3402 engine.enable_lsh(4, 8);
3403
3404 let stats = engine.training_stats();
3405 assert_eq!(stats.total_positions, 2);
3406 assert!(stats.has_move_data);
3407 assert!(stats.move_data_entries > 0);
3408 assert!(stats.lsh_enabled);
3409 assert!(stats.opening_book_enabled);
3410 }
3411
3412 #[test]
3413 fn test_tactical_search_integration() {
3414 let mut engine = ChessVectorEngine::new(1024);
3415 let board = Board::default();
3416
3417 assert!(!engine.is_tactical_search_enabled());
3419
3420 engine.enable_tactical_search_default();
3422 assert!(engine.is_tactical_search_enabled());
3423
3424 let evaluation = engine.evaluate_position(&board);
3426 assert!(evaluation.is_some());
3427
3428 engine.add_position(&board, 0.5);
3430 let hybrid_evaluation = engine.evaluate_position(&board);
3431 assert!(hybrid_evaluation.is_some());
3432 }
3433
3434 #[test]
3435 fn test_hybrid_evaluation_configuration() {
3436 let mut engine = ChessVectorEngine::new(1024);
3437 let board = Board::default();
3438
3439 engine.enable_tactical_search_default();
3441
3442 let custom_config = HybridConfig {
3444 pattern_confidence_threshold: 0.9, enable_tactical_refinement: true,
3446 tactical_config: TacticalConfig::default(),
3447 pattern_weight: 0.8,
3448 min_similar_positions: 5,
3449 };
3450
3451 engine.configure_hybrid_evaluation(custom_config);
3452
3453 engine.add_position(&board, 0.3);
3455
3456 let evaluation = engine.evaluate_position(&board);
3457 assert!(evaluation.is_some());
3458
3459 let no_tactical_config = HybridConfig {
3461 enable_tactical_refinement: false,
3462 ..HybridConfig::default()
3463 };
3464
3465 engine.configure_hybrid_evaluation(no_tactical_config);
3466
3467 let pattern_only_evaluation = engine.evaluate_position(&board);
3468 assert!(pattern_only_evaluation.is_some());
3469 }
3470}