1use crate::{EmbeddingError, EmbeddingModel, ModelConfig, Vector};
15use anyhow::Result;
16use async_trait::async_trait;
17use scirs2_core::ndarray_ext::Array2;
18use scirs2_core::random::{Random, Rng};
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use uuid::Uuid;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct BiologicalComputingConfig {
26 pub dna_sequence_length: usize,
28 pub ca_grid_size: (usize, usize),
30 pub ca_evolution_steps: usize,
32 pub enzyme_reaction_rate: f64,
34 pub gene_regulation_strength: f64,
36 pub assembly_temperature: f64,
38 pub mutation_rate: f64,
40 pub population_size: usize,
42 pub num_generations: usize,
44 pub ca_rule: CellularAutomataRule,
46 pub dna_method: DNAComputingMethod,
48 pub assembly_type: MolecularAssemblyType,
50}
51
52impl Default for BiologicalComputingConfig {
53 fn default() -> Self {
54 Self {
55 dna_sequence_length: 256,
56 ca_grid_size: (64, 64),
57 ca_evolution_steps: 100,
58 enzyme_reaction_rate: 0.1,
59 gene_regulation_strength: 0.5,
60 assembly_temperature: 300.0, mutation_rate: 0.01,
62 population_size: 100,
63 num_generations: 50,
64 ca_rule: CellularAutomataRule::Conway,
65 dna_method: DNAComputingMethod::Hybridization,
66 assembly_type: MolecularAssemblyType::SelfAssembly,
67 }
68 }
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub enum CellularAutomataRule {
74 Conway, Elementary30, Elementary110, Langton, Custom(Vec<u8>), }
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum DNAComputingMethod {
84 Hybridization, PCR, Ligation, Restriction, CRISPR, }
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub enum MolecularAssemblyType {
94 SelfAssembly, TemplateDirected, Hierarchical, DynamicAssembly, }
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
102pub enum Nucleotide {
103 A, T, G, C, }
108
109impl Nucleotide {
110 pub fn complement(&self) -> Self {
112 match self {
113 Nucleotide::A => Nucleotide::T,
114 Nucleotide::T => Nucleotide::A,
115 Nucleotide::G => Nucleotide::C,
116 Nucleotide::C => Nucleotide::G,
117 }
118 }
119
120 pub fn to_numeric(&self) -> f64 {
122 match self {
123 Nucleotide::A => 0.0,
124 Nucleotide::T => 1.0,
125 Nucleotide::G => 2.0,
126 Nucleotide::C => 3.0,
127 }
128 }
129
130 pub fn from_numeric(value: f64) -> Self {
132 match (value % 4.0) as u8 {
133 0 => Nucleotide::A,
134 1 => Nucleotide::T,
135 2 => Nucleotide::G,
136 3 => Nucleotide::C,
137 _ => Nucleotide::A,
138 }
139 }
140
141 pub fn random(rng: &mut impl Rng) -> Self {
143 match rng.random_range(0..4) {
144 0 => Nucleotide::A,
145 1 => Nucleotide::T,
146 2 => Nucleotide::G,
147 3 => Nucleotide::C,
148 _ => unreachable!(),
149 }
150 }
151}
152
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct DNASequence {
156 pub sequence: Vec<Nucleotide>,
157 pub length: usize,
158}
159
160impl DNASequence {
161 pub fn new(sequence: Vec<Nucleotide>) -> Self {
163 let length = sequence.len();
164 Self { sequence, length }
165 }
166
167 pub fn random(length: usize, rng: &mut impl Rng) -> Self {
169 let sequence = (0..length).map(|_| Nucleotide::random(rng)).collect();
170 Self::new(sequence)
171 }
172
173 pub fn complement(&self) -> Self {
175 let complement_seq = self.sequence.iter().map(|n| n.complement()).collect();
176 Self::new(complement_seq)
177 }
178
179 pub fn hybridize(&self, other: &Self) -> f64 {
181 if self.length != other.length {
182 return 0.0;
183 }
184
185 let matches = self
186 .sequence
187 .iter()
188 .zip(other.sequence.iter())
189 .filter(|(a, b)| *a == &b.complement())
190 .count();
191
192 matches as f64 / self.length as f64
193 }
194
195 pub fn mutate(&mut self, mutation_rate: f64, rng: &mut impl Rng) {
197 for nucleotide in &mut self.sequence {
198 if rng.random_bool(mutation_rate) {
199 *nucleotide = Nucleotide::random(rng);
200 }
201 }
202 }
203
204 pub fn to_vector(&self) -> Vector {
206 let values: Vec<f32> = self
207 .sequence
208 .iter()
209 .map(|n| n.to_numeric() as f32)
210 .collect();
211 Vector::new(values)
212 }
213
214 pub fn from_vector(vector: &Vector) -> Self {
216 let sequence = vector
217 .values
218 .iter()
219 .map(|&v| Nucleotide::from_numeric(v as f64))
220 .collect();
221 Self::new(sequence)
222 }
223
224 pub fn pcr_amplify(&self, cycles: usize, efficiency: f64) -> Vec<Self> {
226 let mut population = vec![self.clone()];
227
228 for _ in 0..cycles {
229 let current_size = population.len();
230 let new_copies = (current_size as f64 * efficiency) as usize;
231
232 for _ in 0..new_copies {
233 if let Some(template) = population.first() {
234 population.push(template.clone());
235 }
236 }
237 }
238
239 population
240 }
241
242 pub fn restrict(&self, cut_site: &[Nucleotide]) -> Vec<Self> {
244 let mut fragments = Vec::new();
245 let mut current_fragment = Vec::new();
246
247 for i in 0..self.sequence.len() {
248 current_fragment.push(self.sequence[i]);
249
250 if i + 1 >= cut_site.len() && !cut_site.is_empty() {
252 let start = (i + 1).saturating_sub(cut_site.len());
253 if start <= i && start < self.sequence.len() {
254 let window = &self.sequence[start..=i];
255 if window == cut_site {
256 fragments.push(Self::new(current_fragment.clone()));
257 current_fragment.clear();
258 }
259 }
260 }
261 }
262
263 if !current_fragment.is_empty() {
265 fragments.push(Self::new(current_fragment));
266 }
267
268 fragments
269 }
270
271 pub fn ligate(&self, other: &Self) -> Self {
273 let mut combined = self.sequence.clone();
274 combined.extend_from_slice(&other.sequence);
275 Self::new(combined)
276 }
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
281pub struct Cell {
282 pub state: u8,
283 pub embedding_component: f32,
284 pub energy: f32,
285 pub age: usize,
286}
287
288impl Cell {
289 pub fn new(state: u8, embedding_component: f32) -> Self {
290 Self {
291 state,
292 embedding_component,
293 energy: 1.0,
294 age: 0,
295 }
296 }
297
298 pub fn is_alive(&self) -> bool {
299 self.state > 0
300 }
301
302 pub fn update_energy(&mut self, delta: f32) {
303 self.energy = (self.energy + delta).clamp(0.0, 2.0);
304 }
305
306 pub fn age_cell(&mut self) {
307 self.age += 1;
308 }
309}
310
311#[derive(Debug, Clone)]
313pub struct CellularAutomaton {
314 pub grid: Array2<Cell>,
315 pub rule: CellularAutomataRule,
316 pub generation: usize,
317 pub size: (usize, usize),
318}
319
320impl CellularAutomaton {
321 pub fn new(size: (usize, usize), rule: CellularAutomataRule) -> Self {
323 let grid = Array2::from_shape_fn(size, |(i, j)| {
324 Cell::new(0, (i as f32 + j as f32) / (size.0 + size.1) as f32)
325 });
326
327 Self {
328 grid,
329 rule,
330 generation: 0,
331 size,
332 }
333 }
334
335 pub fn initialize_with_embedding(&mut self, embedding: &Vector) {
337 let total_cells = self.size.0 * self.size.1;
338 let _chunk_size = embedding.values.len() / total_cells.min(embedding.values.len());
339
340 for ((i, j), cell) in self.grid.indexed_iter_mut() {
341 let flat_index = i * self.size.1 + j;
342 if flat_index < embedding.values.len() {
343 cell.embedding_component = embedding.values[flat_index];
344 cell.state = if embedding.values[flat_index] > 0.5 {
345 1
346 } else {
347 0
348 };
349 }
350 }
351 }
352
353 pub fn evolve(&mut self) {
355 let new_grid = match &self.rule {
356 CellularAutomataRule::Conway => self.evolve_conway(),
357 CellularAutomataRule::Elementary30 => self.evolve_elementary(30),
358 CellularAutomataRule::Elementary110 => self.evolve_elementary(110),
359 CellularAutomataRule::Langton => self.evolve_langton(),
360 CellularAutomataRule::Custom(rule_table) => self.evolve_custom(rule_table),
361 };
362
363 self.grid = new_grid;
364 self.generation += 1;
365 }
366
367 fn evolve_conway(&self) -> Array2<Cell> {
369 let mut new_grid = self.grid.clone();
370
371 for ((i, j), cell) in new_grid.indexed_iter_mut() {
372 let neighbors = self.count_neighbors(i, j);
373 let current_state = self.grid[[i, j]].state;
374
375 let new_state = match (current_state, neighbors) {
376 (1, 2) | (1, 3) => 1, (0, 3) => 1, _ => 0, };
380
381 cell.state = new_state;
382
383 let neighbor_sum = self.get_neighbor_embedding_sum(i, j);
385 cell.embedding_component = (cell.embedding_component + neighbor_sum * 0.1) / 2.0;
386
387 cell.age_cell();
388 }
389
390 new_grid
391 }
392
393 fn evolve_elementary(&self, rule_number: u8) -> Array2<Cell> {
395 let mut new_grid = self.grid.clone();
396
397 for i in 0..self.size.0 {
399 for j in 0..self.size.1 {
400 let left = if j > 0 {
401 self.grid[[i, j - 1]].state
402 } else {
403 0
404 };
405 let center = self.grid[[i, j]].state;
406 let right = if j < self.size.1 - 1 {
407 self.grid[[i, j + 1]].state
408 } else {
409 0
410 };
411
412 let pattern = (left << 2) | (center << 1) | right;
413 let new_state = (rule_number >> pattern) & 1;
414
415 new_grid[[i, j]].state = new_state;
416
417 let influence = (left as f32 + center as f32 + right as f32) / 3.0;
419 new_grid[[i, j]].embedding_component =
420 (new_grid[[i, j]].embedding_component + influence * 0.1) / 2.0;
421 }
422 }
423
424 new_grid
425 }
426
427 fn evolve_langton(&self) -> Array2<Cell> {
429 let mut new_grid = self.grid.clone();
430
431 for ((i, j), cell) in new_grid.indexed_iter_mut() {
433 let neighbors = self.count_neighbors(i, j);
434
435 let new_state = match (cell.state, neighbors % 4) {
437 (0, 0) | (0, 2) => 1,
438 (1, 1) | (1, 3) => 0,
439 _ => cell.state,
440 };
441
442 cell.state = new_state;
443
444 let direction_factor = (i as f32 - j as f32) / self.size.0 as f32;
446 cell.embedding_component = (cell.embedding_component + direction_factor * 0.05).tanh();
447 }
448
449 new_grid
450 }
451
452 fn evolve_custom(&self, rule_table: &[u8]) -> Array2<Cell> {
454 let mut new_grid = self.grid.clone();
455
456 for ((i, j), cell) in new_grid.indexed_iter_mut() {
457 let neighbors = self.count_neighbors(i, j);
458 let current_state = self.grid[[i, j]].state;
459
460 let rule_index = (current_state as usize * 9 + neighbors).min(rule_table.len() - 1);
461 cell.state = rule_table[rule_index];
462
463 let rule_influence = rule_table[rule_index] as f32 / 255.0;
465 cell.embedding_component = (cell.embedding_component + rule_influence * 0.1) / 2.0;
466 }
467
468 new_grid
469 }
470
471 fn count_neighbors(&self, row: usize, col: usize) -> usize {
473 let mut count = 0;
474
475 for di in -1i32..=1 {
476 for dj in -1i32..=1 {
477 if di == 0 && dj == 0 {
478 continue;
479 }
480
481 let ni = row as i32 + di;
482 let nj = col as i32 + dj;
483
484 if ni >= 0
485 && ni < self.size.0 as i32
486 && nj >= 0
487 && nj < self.size.1 as i32
488 && self.grid[[ni as usize, nj as usize]].is_alive()
489 {
490 count += 1;
491 }
492 }
493 }
494
495 count
496 }
497
498 fn get_neighbor_embedding_sum(&self, row: usize, col: usize) -> f32 {
500 let mut sum = 0.0;
501 let mut count = 0;
502
503 for di in -1i32..=1 {
504 for dj in -1i32..=1 {
505 if di == 0 && dj == 0 {
506 continue;
507 }
508
509 let ni = row as i32 + di;
510 let nj = col as i32 + dj;
511
512 if ni >= 0 && ni < self.size.0 as i32 && nj >= 0 && nj < self.size.1 as i32 {
513 sum += self.grid[[ni as usize, nj as usize]].embedding_component;
514 count += 1;
515 }
516 }
517 }
518
519 if count > 0 {
520 sum / count as f32
521 } else {
522 0.0
523 }
524 }
525
526 pub fn extract_embedding(&self) -> Vector {
528 let values: Vec<f32> = self
529 .grid
530 .iter()
531 .map(|cell| cell.embedding_component)
532 .collect();
533 Vector::new(values)
534 }
535
536 pub fn get_statistics(&self) -> CAStatistics {
538 let total_cells = self.grid.len();
539 let living_cells = self.grid.iter().filter(|cell| cell.is_alive()).count();
540 let total_energy: f32 = self.grid.iter().map(|cell| cell.energy).sum();
541 let avg_age: f32 =
542 self.grid.iter().map(|cell| cell.age as f32).sum::<f32>() / total_cells as f32;
543 let embedding_variance = self.calculate_embedding_variance();
544
545 CAStatistics {
546 generation: self.generation,
547 living_cells,
548 total_cells,
549 density: living_cells as f32 / total_cells as f32,
550 total_energy,
551 average_age: avg_age,
552 embedding_variance,
553 }
554 }
555
556 fn calculate_embedding_variance(&self) -> f32 {
557 let values: Vec<f32> = self
558 .grid
559 .iter()
560 .map(|cell| cell.embedding_component)
561 .collect();
562 let mean = values.iter().sum::<f32>() / values.len() as f32;
563 let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f32>() / values.len() as f32;
564 variance
565 }
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
570pub struct CAStatistics {
571 pub generation: usize,
572 pub living_cells: usize,
573 pub total_cells: usize,
574 pub density: f32,
575 pub total_energy: f32,
576 pub average_age: f32,
577 pub embedding_variance: f32,
578}
579
580#[derive(Debug, Clone)]
582pub struct EnzymaticNetwork {
583 pub enzymes: Vec<Enzyme>,
584 pub substrates: Vec<Substrate>,
585 pub reactions: Vec<Reaction>,
586 pub reaction_rate: f64,
587 pub temperature: f64,
588}
589
590impl EnzymaticNetwork {
591 pub fn new(reaction_rate: f64, temperature: f64) -> Self {
592 Self {
593 enzymes: Vec::new(),
594 substrates: Vec::new(),
595 reactions: Vec::new(),
596 reaction_rate,
597 temperature,
598 }
599 }
600
601 pub fn add_enzyme(&mut self, enzyme: Enzyme) {
603 self.enzymes.push(enzyme);
604 }
605
606 pub fn add_substrate(&mut self, substrate: Substrate) {
608 self.substrates.push(substrate);
609 }
610
611 pub fn simulate_reactions(&mut self, steps: usize) -> Vec<Vector> {
613 let mut results = Vec::new();
614
615 for _ in 0..steps {
616 let reactions = self.reactions.clone();
618 for reaction in &reactions {
619 self.process_reaction(reaction);
620 }
621
622 let embedding = self.extract_embedding();
624 results.push(embedding);
625 }
626
627 results
628 }
629
630 fn process_reaction(&mut self, reaction: &Reaction) {
631 let enzyme_efficiency = self.calculate_enzyme_efficiency(&reaction.enzyme_id);
633 let substrate_concentration = self.get_substrate_concentration(&reaction.substrate_id);
634
635 let reaction_probability = enzyme_efficiency * substrate_concentration * self.reaction_rate;
636
637 let mut random = Random::default();
638 let should_react = random.random::<f64>() < reaction_probability;
639
640 if should_react {
641 if let Some(substrate) = self
643 .substrates
644 .iter_mut()
645 .find(|s| s.id == reaction.substrate_id)
646 {
647 substrate.concentration *= 0.95; substrate.embedding_contribution *= reaction.rate_constant;
649 }
650 }
651 }
652
653 fn calculate_enzyme_efficiency(&self, enzyme_id: &Uuid) -> f64 {
654 self.enzymes
655 .iter()
656 .find(|e| e.id == *enzyme_id)
657 .map(|e| e.efficiency)
658 .unwrap_or(0.0)
659 }
660
661 fn get_substrate_concentration(&self, substrate_id: &Uuid) -> f64 {
662 self.substrates
663 .iter()
664 .find(|s| s.id == *substrate_id)
665 .map(|s| s.concentration)
666 .unwrap_or(0.0)
667 }
668
669 fn extract_embedding(&self) -> Vector {
670 let values: Vec<f32> = self
671 .substrates
672 .iter()
673 .map(|s| s.embedding_contribution as f32)
674 .collect();
675 Vector::new(values)
676 }
677}
678
679#[derive(Debug, Clone)]
681pub struct Enzyme {
682 pub id: Uuid,
683 pub name: String,
684 pub efficiency: f64,
685 pub specificity: f64,
686 pub optimal_temperature: f64,
687 pub optimal_ph: f64,
688}
689
690impl Enzyme {
691 pub fn new(name: String, efficiency: f64) -> Self {
692 Self {
693 id: Uuid::new_v4(),
694 name,
695 efficiency,
696 specificity: 0.8,
697 optimal_temperature: 310.0, optimal_ph: 7.4,
699 }
700 }
701}
702
703#[derive(Debug, Clone)]
705pub struct Substrate {
706 pub id: Uuid,
707 pub name: String,
708 pub concentration: f64,
709 pub embedding_contribution: f64,
710 pub molecular_weight: f64,
711}
712
713impl Substrate {
714 pub fn new(name: String, concentration: f64, embedding_contribution: f64) -> Self {
715 Self {
716 id: Uuid::new_v4(),
717 name,
718 concentration,
719 embedding_contribution,
720 molecular_weight: 100.0, }
722 }
723}
724
725#[derive(Debug, Clone)]
727pub struct Reaction {
728 pub id: Uuid,
729 pub enzyme_id: Uuid,
730 pub substrate_id: Uuid,
731 pub product_id: Uuid,
732 pub rate_constant: f64,
733 pub activation_energy: f64,
734}
735
736#[derive(Debug, Clone)]
738pub struct GeneRegulatoryNetwork {
739 pub genes: Vec<Gene>,
740 pub regulatory_relationships: Vec<RegulatoryRelationship>,
741 pub expression_levels: HashMap<Uuid, f64>,
742 pub regulation_strength: f64,
743}
744
745impl GeneRegulatoryNetwork {
746 pub fn new(regulation_strength: f64) -> Self {
747 Self {
748 genes: Vec::new(),
749 regulatory_relationships: Vec::new(),
750 expression_levels: HashMap::new(),
751 regulation_strength,
752 }
753 }
754
755 pub fn add_gene(&mut self, gene: Gene) {
757 self.expression_levels
758 .insert(gene.id, gene.basal_expression);
759 self.genes.push(gene);
760 }
761
762 pub fn add_regulation(&mut self, relationship: RegulatoryRelationship) {
764 self.regulatory_relationships.push(relationship);
765 }
766
767 pub fn simulate_expression(&mut self, steps: usize) -> Vec<Vector> {
769 let mut results = Vec::new();
770
771 for _ in 0..steps {
772 self.update_expression_levels();
773 let embedding = self.extract_expression_embedding();
774 results.push(embedding);
775 }
776
777 results
778 }
779
780 fn update_expression_levels(&mut self) {
781 let mut new_levels = self.expression_levels.clone();
782
783 for gene in &self.genes {
784 let current_level = self.expression_levels[&gene.id];
785 let regulation_effect = self.calculate_regulation_effect(&gene.id);
786
787 let new_level =
788 (current_level + regulation_effect * self.regulation_strength).clamp(0.0, 10.0); new_levels.insert(gene.id, new_level);
791 }
792
793 self.expression_levels = new_levels;
794 }
795
796 fn calculate_regulation_effect(&self, target_gene_id: &Uuid) -> f64 {
797 let mut total_effect = 0.0;
798
799 for relationship in &self.regulatory_relationships {
800 if relationship.target_gene_id == *target_gene_id {
801 let regulator_level = self.expression_levels[&relationship.regulator_gene_id];
802
803 let effect = match relationship.regulation_type {
804 RegulationType::Activation => regulator_level * relationship.strength,
805 RegulationType::Repression => -regulator_level * relationship.strength,
806 RegulationType::Dual => {
807 if regulator_level > 5.0 {
808 -regulator_level * relationship.strength
809 } else {
810 regulator_level * relationship.strength
811 }
812 }
813 };
814
815 total_effect += effect;
816 }
817 }
818
819 total_effect
820 }
821
822 fn extract_expression_embedding(&self) -> Vector {
823 let values: Vec<f32> = self
824 .genes
825 .iter()
826 .map(|gene| self.expression_levels[&gene.id] as f32)
827 .collect();
828 Vector::new(values)
829 }
830}
831
832#[derive(Debug, Clone)]
834pub struct Gene {
835 pub id: Uuid,
836 pub name: String,
837 pub sequence: DNASequence,
838 pub basal_expression: f64,
839 pub max_expression: f64,
840 pub half_life: f64, }
842
843impl Gene {
844 pub fn new(name: String, sequence: DNASequence) -> Self {
845 Self {
846 id: Uuid::new_v4(),
847 name,
848 sequence,
849 basal_expression: 1.0,
850 max_expression: 10.0,
851 half_life: 120.0, }
853 }
854}
855
856#[derive(Debug, Clone)]
858pub struct RegulatoryRelationship {
859 pub regulator_gene_id: Uuid,
860 pub target_gene_id: Uuid,
861 pub regulation_type: RegulationType,
862 pub strength: f64,
863 pub binding_affinity: f64,
864}
865
866#[derive(Debug, Clone)]
867pub enum RegulationType {
868 Activation,
869 Repression,
870 Dual, }
872
873#[derive(Debug, Clone)]
875pub struct MolecularAssembly {
876 pub molecules: Vec<Molecule>,
877 pub assembly_rules: Vec<AssemblyRule>,
878 pub temperature: f64,
879 pub assembly_type: MolecularAssemblyType,
880 pub assembled_structures: Vec<AssembledStructure>,
881}
882
883impl MolecularAssembly {
884 pub fn new(assembly_type: MolecularAssemblyType, temperature: f64) -> Self {
885 Self {
886 molecules: Vec::new(),
887 assembly_rules: Vec::new(),
888 temperature,
889 assembly_type,
890 assembled_structures: Vec::new(),
891 }
892 }
893
894 pub fn add_molecule(&mut self, molecule: Molecule) {
896 self.molecules.push(molecule);
897 }
898
899 pub fn add_assembly_rule(&mut self, rule: AssemblyRule) {
901 self.assembly_rules.push(rule);
902 }
903
904 pub fn simulate_assembly(&mut self, steps: usize) -> Vec<Vector> {
906 let mut results = Vec::new();
907
908 for _ in 0..steps {
909 self.perform_assembly_step();
910 let embedding = self.extract_assembly_embedding();
911 results.push(embedding);
912 }
913
914 results
915 }
916
917 fn perform_assembly_step(&mut self) {
918 let mut random = Random::default();
919
920 let assembly_rules = self.assembly_rules.clone();
922 for rule in &assembly_rules {
923 let binding_probability = self.calculate_binding_probability(rule);
924
925 if random.random::<f64>() < binding_probability {
926 self.execute_assembly_rule(rule);
927 }
928 }
929
930 self.apply_thermal_effects();
932 }
933
934 fn calculate_binding_probability(&self, rule: &AssemblyRule) -> f64 {
935 let thermal_factor = (-rule.binding_energy / (8.314 * self.temperature)).exp();
936 rule.base_probability * thermal_factor
937 }
938
939 fn execute_assembly_rule(&mut self, rule: &AssemblyRule) {
940 let compatible_molecules: Vec<_> = self
942 .molecules
943 .iter()
944 .filter(|m| rule.compatible_types.contains(&m.molecule_type))
945 .cloned()
946 .collect();
947
948 if compatible_molecules.len() >= 2 {
949 let structure = AssembledStructure {
951 id: Uuid::new_v4(),
952 component_molecules: compatible_molecules.iter().map(|m| m.id).collect(),
953 stability: rule.binding_energy / self.temperature,
954 embedding_contribution: compatible_molecules
955 .iter()
956 .map(|m| m.embedding_contribution)
957 .sum::<f64>()
958 / compatible_molecules.len() as f64,
959 };
960
961 self.assembled_structures.push(structure);
962 }
963 }
964
965 fn apply_thermal_effects(&mut self) {
966 let mut random = Random::default();
967
968 for molecule in &mut self.molecules {
970 molecule.position[0] += random.gen_range(-0.1..0.1);
971 molecule.position[1] += random.gen_range(-0.1..0.1);
972 molecule.position[2] += random.gen_range(-0.1..0.1);
973
974 let thermal_noise = random.gen_range(-0.01..0.01);
976 molecule.embedding_contribution += thermal_noise;
977 }
978
979 self.assembled_structures.retain(|structure| {
981 let disassembly_probability = (self.temperature / 1000.0) / structure.stability;
982 random.random::<f64>() >= disassembly_probability
983 });
984 }
985
986 fn extract_assembly_embedding(&self) -> Vector {
987 let mut values = Vec::new();
988
989 for molecule in &self.molecules {
991 values.push(molecule.embedding_contribution as f32);
992 }
993
994 for structure in &self.assembled_structures {
996 values.push(structure.embedding_contribution as f32);
997 }
998
999 Vector::new(values)
1000 }
1001}
1002
1003#[derive(Debug, Clone)]
1005pub struct Molecule {
1006 pub id: Uuid,
1007 pub molecule_type: MoleculeType,
1008 pub position: [f64; 3],
1009 pub orientation: [f64; 3],
1010 pub embedding_contribution: f64,
1011 pub binding_sites: Vec<BindingSite>,
1012}
1013
1014impl Molecule {
1015 pub fn new(molecule_type: MoleculeType, position: [f64; 3]) -> Self {
1016 Self {
1017 id: Uuid::new_v4(),
1018 molecule_type,
1019 position,
1020 orientation: [0.0, 0.0, 0.0],
1021 embedding_contribution: {
1022 let mut random = Random::default();
1023 random.gen_range(-1.0..1.0)
1024 },
1025 binding_sites: Vec::new(),
1026 }
1027 }
1028}
1029
1030#[derive(Debug, Clone, PartialEq)]
1031pub enum MoleculeType {
1032 Protein,
1033 DNA,
1034 RNA,
1035 Lipid,
1036 Carbohydrate,
1037 Metabolite,
1038}
1039
1040#[derive(Debug, Clone)]
1042pub struct BindingSite {
1043 pub site_type: String,
1044 pub affinity: f64,
1045 pub is_occupied: bool,
1046}
1047
1048#[derive(Debug, Clone)]
1050pub struct AssemblyRule {
1051 pub id: Uuid,
1052 pub compatible_types: Vec<MoleculeType>,
1053 pub binding_energy: f64,
1054 pub base_probability: f64,
1055 pub geometric_constraints: Vec<String>,
1056}
1057
1058#[derive(Debug, Clone)]
1060pub struct AssembledStructure {
1061 pub id: Uuid,
1062 pub component_molecules: Vec<Uuid>,
1063 pub stability: f64,
1064 pub embedding_contribution: f64,
1065}
1066
1067#[derive(Debug, Clone)]
1069pub struct BiologicalEmbeddingModel {
1070 id: Uuid,
1071 config: ModelConfig,
1072 bio_config: BiologicalComputingConfig,
1073 dna_sequences: HashMap<String, DNASequence>,
1074 cellular_automaton: CellularAutomaton,
1075 enzymatic_network: EnzymaticNetwork,
1076 gene_network: GeneRegulatoryNetwork,
1077 molecular_assembly: MolecularAssembly,
1078 entities: HashMap<String, usize>,
1079 relations: HashMap<String, usize>,
1080 is_trained: bool,
1081 stats: crate::ModelStats,
1082}
1083
1084impl BiologicalEmbeddingModel {
1085 pub fn new(config: ModelConfig, bio_config: BiologicalComputingConfig) -> Self {
1087 let cellular_automaton =
1088 CellularAutomaton::new(bio_config.ca_grid_size, bio_config.ca_rule.clone());
1089
1090 let enzymatic_network = EnzymaticNetwork::new(
1091 bio_config.enzyme_reaction_rate,
1092 bio_config.assembly_temperature,
1093 );
1094
1095 let gene_network = GeneRegulatoryNetwork::new(bio_config.gene_regulation_strength);
1096
1097 let molecular_assembly = MolecularAssembly::new(
1098 bio_config.assembly_type.clone(),
1099 bio_config.assembly_temperature,
1100 );
1101
1102 Self {
1103 id: Uuid::new_v4(),
1104 config: config.clone(),
1105 bio_config,
1106 dna_sequences: HashMap::new(),
1107 cellular_automaton,
1108 enzymatic_network,
1109 gene_network,
1110 molecular_assembly,
1111 entities: HashMap::new(),
1112 relations: HashMap::new(),
1113 is_trained: false,
1114 stats: crate::ModelStats {
1115 model_type: "BiologicalEmbedding".to_string(),
1116 dimensions: config.dimensions,
1117 creation_time: chrono::Utc::now(),
1118 ..Default::default()
1119 },
1120 }
1121 }
1122
1123 pub fn encode_entity_dna(&mut self, entity: &str) -> DNASequence {
1125 let mut random = Random::default();
1126
1127 let mut sequence = Vec::new();
1129 for byte in entity.bytes() {
1130 let nucleotides_per_byte = 4; for i in 0..nucleotides_per_byte {
1132 let bits = (byte >> (i * 2)) & 0b11;
1133 let nucleotide = match bits {
1134 0 => Nucleotide::A,
1135 1 => Nucleotide::T,
1136 2 => Nucleotide::G,
1137 3 => Nucleotide::C,
1138 _ => unreachable!(),
1139 };
1140 sequence.push(nucleotide);
1141 }
1142 }
1143
1144 while sequence.len() < self.bio_config.dna_sequence_length {
1146 sequence.push(Nucleotide::random(&mut random));
1147 }
1148
1149 sequence.truncate(self.bio_config.dna_sequence_length);
1150 DNASequence::new(sequence)
1151 }
1152
1153 pub fn compute_dna_embedding(&self, entity1: &str, entity2: &str) -> f64 {
1155 if let (Some(seq1), Some(seq2)) = (
1156 self.dna_sequences.get(entity1),
1157 self.dna_sequences.get(entity2),
1158 ) {
1159 seq1.hybridize(seq2)
1160 } else {
1161 0.0
1162 }
1163 }
1164
1165 pub fn generate_ca_embedding(&mut self, input_embedding: &Vector) -> Vector {
1167 self.cellular_automaton
1168 .initialize_with_embedding(input_embedding);
1169
1170 for _ in 0..self.bio_config.ca_evolution_steps {
1171 self.cellular_automaton.evolve();
1172 }
1173
1174 self.cellular_automaton.extract_embedding()
1175 }
1176
1177 pub fn optimize_enzymatic_embedding(&mut self, embedding: &Vector) -> Vector {
1179 for (i, &value) in embedding.values.iter().enumerate() {
1181 let substrate =
1182 Substrate::new(format!("substrate_{i}"), value.abs() as f64, value as f64);
1183 self.enzymatic_network.add_substrate(substrate);
1184 }
1185
1186 let results = self.enzymatic_network.simulate_reactions(50);
1188
1189 results.last().cloned().unwrap_or_else(|| embedding.clone())
1190 }
1191
1192 pub fn evolve_gene_embedding(&mut self, embedding: &Vector) -> Vector {
1194 let mut random = Random::default();
1196
1197 for (i, &value) in embedding.values.iter().enumerate() {
1198 let dna_seq = DNASequence::random(100, &mut random);
1199 let mut gene = Gene::new(format!("gene_{i}"), dna_seq);
1200 gene.basal_expression = value.abs() as f64;
1201 self.gene_network.add_gene(gene);
1202 }
1203
1204 for i in 0..embedding.values.len() {
1206 for j in 0..embedding.values.len() {
1207 if i != j && random.random::<f64>() < 0.1 {
1208 let relationship = RegulatoryRelationship {
1210 regulator_gene_id: self.gene_network.genes[i].id,
1211 target_gene_id: self.gene_network.genes[j].id,
1212 regulation_type: if random.random::<f64>() < 0.5 {
1213 RegulationType::Activation
1214 } else {
1215 RegulationType::Repression
1216 },
1217 strength: random.gen_range(0.1..1.0),
1218 binding_affinity: random.gen_range(0.5..1.0),
1219 };
1220 self.gene_network.add_regulation(relationship);
1221 }
1222 }
1223 }
1224
1225 let results = self.gene_network.simulate_expression(100);
1227
1228 results.last().cloned().unwrap_or_else(|| embedding.clone())
1229 }
1230
1231 pub fn assemble_embedding(&mut self, embedding: &Vector) -> Vector {
1233 for (i, &value) in embedding.values.iter().enumerate() {
1235 let molecule_type = match i % 6 {
1236 0 => MoleculeType::Protein,
1237 1 => MoleculeType::DNA,
1238 2 => MoleculeType::RNA,
1239 3 => MoleculeType::Lipid,
1240 4 => MoleculeType::Carbohydrate,
1241 5 => MoleculeType::Metabolite,
1242 _ => unreachable!(),
1243 };
1244
1245 let position = [value as f64, (value * 2.0) as f64, (value * 3.0) as f64];
1246
1247 let mut molecule = Molecule::new(molecule_type, position);
1248 molecule.embedding_contribution = value as f64;
1249
1250 self.molecular_assembly.add_molecule(molecule);
1251 }
1252
1253 let rule = AssemblyRule {
1255 id: Uuid::new_v4(),
1256 compatible_types: vec![MoleculeType::Protein, MoleculeType::DNA, MoleculeType::RNA],
1257 binding_energy: 50.0, base_probability: 0.1,
1259 geometric_constraints: vec!["proximity".to_string()],
1260 };
1261 self.molecular_assembly.add_assembly_rule(rule);
1262
1263 let results = self.molecular_assembly.simulate_assembly(100);
1265
1266 results.last().cloned().unwrap_or_else(|| embedding.clone())
1267 }
1268}
1269
1270#[async_trait]
1271impl EmbeddingModel for BiologicalEmbeddingModel {
1272 fn config(&self) -> &ModelConfig {
1273 &self.config
1274 }
1275
1276 fn model_id(&self) -> &Uuid {
1277 &self.id
1278 }
1279
1280 fn model_type(&self) -> &'static str {
1281 "BiologicalEmbedding"
1282 }
1283
1284 fn add_triple(&mut self, triple: crate::Triple) -> Result<()> {
1285 let subj_id = self.entities.len();
1286 let pred_id = self.relations.len();
1287 let obj_id = self.entities.len() + 1;
1288
1289 self.entities
1290 .entry(triple.subject.iri.clone())
1291 .or_insert(subj_id);
1292 self.relations
1293 .entry(triple.predicate.iri.clone())
1294 .or_insert(pred_id);
1295 self.entities
1296 .entry(triple.object.iri.clone())
1297 .or_insert(obj_id);
1298
1299 let subj_dna = self.encode_entity_dna(&triple.subject.iri);
1301 let obj_dna = self.encode_entity_dna(&triple.object.iri);
1302
1303 self.dna_sequences.insert(triple.subject.iri, subj_dna);
1304 self.dna_sequences.insert(triple.object.iri, obj_dna);
1305
1306 self.stats.num_triples += 1;
1307 self.stats.num_entities = self.entities.len();
1308 self.stats.num_relations = self.relations.len();
1309
1310 Ok(())
1311 }
1312
1313 async fn train(&mut self, epochs: Option<usize>) -> Result<crate::TrainingStats> {
1314 let max_epochs = epochs.unwrap_or(self.config.max_epochs);
1315 let mut loss_history = Vec::new();
1316 let start_time = std::time::Instant::now();
1317
1318 for epoch in 0..max_epochs {
1320 for sequence in self.dna_sequences.values_mut() {
1324 let mut random = Random::default();
1325 sequence.mutate(self.bio_config.mutation_rate, &mut random);
1326 }
1327
1328 for _ in 0..10 {
1330 self.cellular_automaton.evolve();
1331 }
1332
1333 let dummy_embedding = Vector::new(vec![0.5; 128]);
1335 let _optimized = self.optimize_enzymatic_embedding(&dummy_embedding);
1336
1337 let loss = 1.0 / (epoch as f64 + 1.0);
1339 loss_history.push(loss);
1340
1341 if loss < 0.01 {
1342 break;
1343 }
1344 }
1345
1346 self.is_trained = true;
1347 self.stats.is_trained = true;
1348 self.stats.last_training_time = Some(chrono::Utc::now());
1349
1350 let training_time = start_time.elapsed().as_secs_f64();
1351
1352 Ok(crate::TrainingStats {
1353 epochs_completed: max_epochs,
1354 final_loss: loss_history.last().copied().unwrap_or(1.0),
1355 training_time_seconds: training_time,
1356 convergence_achieved: true,
1357 loss_history,
1358 })
1359 }
1360
1361 fn get_entity_embedding(&self, entity: &str) -> Result<Vector> {
1362 if !self.is_trained {
1363 return Err(EmbeddingError::ModelNotTrained.into());
1364 }
1365
1366 if let Some(dna_seq) = self.dna_sequences.get(entity) {
1367 Ok(dna_seq.to_vector())
1368 } else {
1369 Err(EmbeddingError::EntityNotFound {
1370 entity: entity.to_string(),
1371 }
1372 .into())
1373 }
1374 }
1375
1376 fn get_relation_embedding(&self, relation: &str) -> Result<Vector> {
1377 if !self.is_trained {
1378 return Err(EmbeddingError::ModelNotTrained.into());
1379 }
1380
1381 let ca_embedding = self.cellular_automaton.extract_embedding();
1383
1384 let relation_hash = relation.bytes().map(|b| b as usize).sum::<usize>();
1386 let start_idx = relation_hash % ca_embedding.values.len().max(1);
1387 let end_idx =
1388 ((start_idx + self.config.dimensions) % ca_embedding.values.len()).max(start_idx + 1);
1389
1390 let values = if start_idx < end_idx {
1391 ca_embedding.values[start_idx..end_idx].to_vec()
1392 } else {
1393 let mut values = ca_embedding.values[start_idx..].to_vec();
1394 values.extend_from_slice(&ca_embedding.values[..end_idx]);
1395 values
1396 };
1397
1398 let mut final_values = values;
1400 final_values.resize(self.config.dimensions, 0.0);
1401
1402 Ok(Vector::new(final_values))
1403 }
1404
1405 fn score_triple(&self, subject: &str, predicate: &str, object: &str) -> Result<f64> {
1406 let s_emb = self.get_entity_embedding(subject)?;
1407 let p_emb = self.get_relation_embedding(predicate)?;
1408 let o_emb = self.get_entity_embedding(object)?;
1409
1410 let dna_score = self.compute_dna_embedding(subject, object);
1412
1413 let traditional_score = s_emb
1415 .values
1416 .iter()
1417 .zip(p_emb.values.iter())
1418 .zip(o_emb.values.iter())
1419 .map(|((&s, &p), &o)| (s * p * o) as f64)
1420 .sum::<f64>();
1421
1422 Ok((dna_score + traditional_score) / 2.0)
1423 }
1424
1425 fn predict_objects(
1426 &self,
1427 subject: &str,
1428 predicate: &str,
1429 k: usize,
1430 ) -> Result<Vec<(String, f64)>> {
1431 let mut predictions = Vec::new();
1432
1433 for entity in self.entities.keys() {
1434 if let Ok(score) = self.score_triple(subject, predicate, entity) {
1435 predictions.push((entity.clone(), score));
1436 }
1437 }
1438
1439 predictions.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1440 predictions.truncate(k);
1441
1442 Ok(predictions)
1443 }
1444
1445 fn predict_subjects(
1446 &self,
1447 predicate: &str,
1448 object: &str,
1449 k: usize,
1450 ) -> Result<Vec<(String, f64)>> {
1451 let mut predictions = Vec::new();
1452
1453 for entity in self.entities.keys() {
1454 if let Ok(score) = self.score_triple(entity, predicate, object) {
1455 predictions.push((entity.clone(), score));
1456 }
1457 }
1458
1459 predictions.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1460 predictions.truncate(k);
1461
1462 Ok(predictions)
1463 }
1464
1465 fn predict_relations(
1466 &self,
1467 subject: &str,
1468 object: &str,
1469 k: usize,
1470 ) -> Result<Vec<(String, f64)>> {
1471 let mut predictions = Vec::new();
1472
1473 for relation in self.relations.keys() {
1474 if let Ok(score) = self.score_triple(subject, relation, object) {
1475 predictions.push((relation.clone(), score));
1476 }
1477 }
1478
1479 predictions.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1480 predictions.truncate(k);
1481
1482 Ok(predictions)
1483 }
1484
1485 fn get_entities(&self) -> Vec<String> {
1486 self.entities.keys().cloned().collect()
1487 }
1488
1489 fn get_relations(&self) -> Vec<String> {
1490 self.relations.keys().cloned().collect()
1491 }
1492
1493 fn get_stats(&self) -> crate::ModelStats {
1494 self.stats.clone()
1495 }
1496
1497 fn save(&self, _path: &str) -> Result<()> {
1498 Ok(())
1499 }
1500
1501 fn load(&mut self, _path: &str) -> Result<()> {
1502 Ok(())
1503 }
1504
1505 fn clear(&mut self) {
1506 self.entities.clear();
1507 self.relations.clear();
1508 self.dna_sequences.clear();
1509 self.is_trained = false;
1510 self.stats = crate::ModelStats::default();
1511 }
1512
1513 fn is_trained(&self) -> bool {
1514 self.is_trained
1515 }
1516
1517 async fn encode(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> {
1518 let mut encoded = Vec::new();
1519
1520 for text in texts {
1521 let mut temp_model = self.clone();
1523 let dna_seq = temp_model.encode_entity_dna(text);
1524 let embedding = dna_seq.to_vector();
1525
1526 let ca_embedding = temp_model.generate_ca_embedding(&embedding);
1528
1529 encoded.push(ca_embedding.values);
1530 }
1531
1532 Ok(encoded)
1533 }
1534}
1535
1536#[cfg(test)]
1537mod tests {
1538 use super::*;
1539
1540 #[test]
1541 fn test_nucleotide_operations() {
1542 let a = Nucleotide::A;
1543 let t = Nucleotide::T;
1544
1545 assert_eq!(a.complement(), t);
1546 assert_eq!(t.complement(), a);
1547 assert_eq!(a.to_numeric(), 0.0);
1548 assert_eq!(Nucleotide::from_numeric(0.0), a);
1549 }
1550
1551 #[test]
1552 fn test_dna_sequence() {
1553 let seq1 = DNASequence::new(vec![
1554 Nucleotide::A,
1555 Nucleotide::T,
1556 Nucleotide::G,
1557 Nucleotide::C,
1558 ]);
1559 let seq2 = DNASequence::new(vec![
1560 Nucleotide::T,
1561 Nucleotide::A,
1562 Nucleotide::C,
1563 Nucleotide::G,
1564 ]);
1565
1566 assert_eq!(seq1.length, 4);
1567 assert_eq!(seq1.hybridize(&seq2), 1.0); let vector = seq1.to_vector();
1570 assert_eq!(vector.values.len(), 4);
1571 }
1572
1573 #[test]
1574 fn test_cellular_automaton() {
1575 let mut ca = CellularAutomaton::new((10, 10), CellularAutomataRule::Conway);
1576 let embedding = Vector::new(vec![0.5; 100]);
1577
1578 ca.initialize_with_embedding(&embedding);
1579 ca.evolve();
1580
1581 assert_eq!(ca.generation, 1);
1582 let stats = ca.get_statistics();
1583 assert_eq!(stats.total_cells, 100);
1584 }
1585
1586 #[test]
1587 fn test_enzymatic_network() {
1588 let mut network = EnzymaticNetwork::new(0.1, 300.0);
1589
1590 let enzyme = Enzyme::new("test_enzyme".to_string(), 0.8);
1591 let substrate = Substrate::new("test_substrate".to_string(), 1.0, 0.5);
1592
1593 network.add_enzyme(enzyme);
1594 network.add_substrate(substrate);
1595
1596 let results = network.simulate_reactions(10);
1597 assert_eq!(results.len(), 10);
1598 }
1599
1600 #[test]
1601 fn test_gene_regulatory_network() {
1602 let mut network = GeneRegulatoryNetwork::new(0.5);
1603 let mut random = Random::default();
1604
1605 let dna_seq = DNASequence::random(100, &mut random);
1606 let gene = Gene::new("test_gene".to_string(), dna_seq);
1607 network.add_gene(gene);
1608
1609 let results = network.simulate_expression(10);
1610 assert_eq!(results.len(), 10);
1611 }
1612
1613 #[test]
1614 fn test_molecular_assembly() {
1615 let mut assembly = MolecularAssembly::new(MolecularAssemblyType::SelfAssembly, 300.0);
1616
1617 let molecule = Molecule::new(MoleculeType::Protein, [0.0, 0.0, 0.0]);
1618 assembly.add_molecule(molecule);
1619
1620 let results = assembly.simulate_assembly(10);
1621 assert_eq!(results.len(), 10);
1622 }
1623
1624 #[tokio::test]
1625 async fn test_biological_embedding_model() {
1626 let model_config = ModelConfig::default();
1627 let bio_config = BiologicalComputingConfig::default();
1628 let mut model = BiologicalEmbeddingModel::new(model_config, bio_config);
1629
1630 let triple = crate::Triple::new(
1631 crate::NamedNode::new("http://example.org/alice").unwrap(),
1632 crate::NamedNode::new("http://example.org/knows").unwrap(),
1633 crate::NamedNode::new("http://example.org/bob").unwrap(),
1634 );
1635
1636 model.add_triple(triple).unwrap();
1637 assert_eq!(model.get_entities().len(), 2);
1638 assert_eq!(model.get_relations().len(), 1);
1639 assert_eq!(model.dna_sequences.len(), 2);
1640 }
1641
1642 #[test]
1643 fn test_dna_encoding() {
1644 let model_config = ModelConfig::default();
1645 let bio_config = BiologicalComputingConfig::default();
1646 let mut model = BiologicalEmbeddingModel::new(model_config, bio_config);
1647
1648 let dna_seq = model.encode_entity_dna("test_entity");
1649 assert_eq!(dna_seq.length, 256);
1650 }
1651
1652 #[test]
1653 fn test_pcr_amplification() {
1654 let mut random = Random::default();
1655 let seq = DNASequence::random(50, &mut random);
1656
1657 let amplified = seq.pcr_amplify(5, 2.0);
1658 assert!(amplified.len() > 1);
1659 }
1660
1661 #[test]
1662 fn test_restriction_cutting() {
1663 let seq = DNASequence::new(vec![
1664 Nucleotide::A,
1665 Nucleotide::T,
1666 Nucleotide::G,
1667 Nucleotide::C,
1668 Nucleotide::G,
1669 Nucleotide::C, Nucleotide::T,
1671 Nucleotide::A,
1672 ]);
1673
1674 let cut_site = vec![Nucleotide::G, Nucleotide::C];
1675 let fragments = seq.restrict(&cut_site);
1676
1677 assert!(!fragments.is_empty());
1678 }
1679}