Skip to main content

oxirs_core/molecular/
dna_structures.rs

1//! DNA-inspired data structures for RDF storage
2
3use super::replication::ReplicationMachinery;
4use super::types::*;
5use crate::error::OxirsResult;
6use crate::model::{Object, Predicate, Subject, Term, Triple};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// DNA-inspired data structure for RDF storage
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct DnaDataStructure {
13    /// Primary DNA strand (main data)
14    pub primary_strand: Vec<NucleotideData>,
15    /// Complementary strand (redundancy/validation)
16    pub complementary_strand: Vec<NucleotideData>,
17    /// Genetic markers for indexing
18    pub genetic_markers: HashMap<String, usize>,
19    /// Chromosome segments for partitioning
20    pub chromosomes: Vec<ChromosomeSegment>,
21    /// Replication machinery
22    pub replication_machinery: ReplicationMachinery,
23}
24
25/// Nucleotide representation for RDF data
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub enum NucleotideData {
28    /// Adenine - represents subjects
29    Adenine(Term),
30    /// Thymine - represents predicates
31    Thymine(Term),
32    /// Guanine - represents objects
33    Guanine(Term),
34    /// Cytosine - represents special markers
35    Cytosine(SpecialMarker),
36}
37
38/// Special markers for DNA structure
39#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
40pub enum SpecialMarker {
41    /// Start of gene (triple boundary)
42    StartCodon,
43    /// End of gene (triple boundary)
44    StopCodon,
45    /// Promoter region (index marker)
46    Promoter(String),
47    /// Operator (access control)
48    Operator(AccessLevel),
49    /// Enhancer (performance boost)
50    Enhancer(String),
51    /// Silencer (access restriction)
52    Silencer(String),
53    /// Methylation site (caching marker)
54    MethylationSite(MethylationPattern),
55    /// Histone binding site (compression marker)
56    HistoneBinding(HistoneModification),
57}
58
59impl DnaDataStructure {
60    /// Create a new DNA-inspired data structure
61    pub fn new() -> Self {
62        Self {
63            primary_strand: Vec::new(),
64            complementary_strand: Vec::new(),
65            genetic_markers: HashMap::new(),
66            chromosomes: Vec::new(),
67            replication_machinery: ReplicationMachinery::new(),
68        }
69    }
70
71    /// Encode a triple into nucleotide sequence
72    pub fn encode_triple(&mut self, triple: &Triple) -> OxirsResult<()> {
73        // Add start codon
74        self.primary_strand
75            .push(NucleotideData::Cytosine(SpecialMarker::StartCodon));
76
77        // Encode subject as Adenine
78        self.primary_strand
79            .push(NucleotideData::Adenine(triple.subject().clone().into()));
80
81        // Encode predicate as Thymine
82        self.primary_strand
83            .push(NucleotideData::Thymine(triple.predicate().clone().into()));
84
85        // Encode object as Guanine
86        self.primary_strand
87            .push(NucleotideData::Guanine(triple.object().clone().into()));
88
89        // Add stop codon
90        self.primary_strand
91            .push(NucleotideData::Cytosine(SpecialMarker::StopCodon));
92
93        // Generate complementary strand
94        self.synthesize_complementary_strand()?;
95
96        Ok(())
97    }
98
99    /// Synthesize complementary strand for validation
100    fn synthesize_complementary_strand(&mut self) -> OxirsResult<()> {
101        self.complementary_strand.clear();
102
103        for nucleotide in &self.primary_strand {
104            let complement = match nucleotide {
105                NucleotideData::Adenine(term) => NucleotideData::Thymine(term.clone()),
106                NucleotideData::Thymine(term) => NucleotideData::Adenine(term.clone()),
107                NucleotideData::Guanine(term) => {
108                    NucleotideData::Cytosine(SpecialMarker::Enhancer(term.to_string()))
109                }
110                NucleotideData::Cytosine(marker) => NucleotideData::Guanine(Term::NamedNode(
111                    crate::model::NamedNode::new(format!("marker:{}", marker.type_name()))
112                        .expect("marker IRI is valid"),
113                )),
114            };
115            self.complementary_strand.push(complement);
116        }
117
118        Ok(())
119    }
120
121    /// Decode nucleotide sequence back to triples
122    pub fn decode_triples(&self) -> OxirsResult<Vec<Triple>> {
123        let mut triples = Vec::new();
124        let mut current_triple_data: Vec<Term> = Vec::new();
125        let mut in_gene = false;
126
127        for nucleotide in &self.primary_strand {
128            match nucleotide {
129                NucleotideData::Cytosine(SpecialMarker::StartCodon) => {
130                    in_gene = true;
131                    current_triple_data.clear();
132                }
133                NucleotideData::Cytosine(SpecialMarker::StopCodon) => {
134                    if in_gene && current_triple_data.len() == 3 {
135                        if let (Some(subject), Some(predicate), Some(object)) = (
136                            current_triple_data.first(),
137                            current_triple_data.get(1),
138                            current_triple_data.get(2),
139                        ) {
140                            if let (Ok(s), Ok(p)) = (
141                                Subject::try_from(subject.clone()),
142                                Predicate::try_from(predicate.clone()),
143                            ) {
144                                let o: Object = object.clone().into();
145                                triples.push(Triple::new(s, p, o));
146                            }
147                        }
148                    }
149                    in_gene = false;
150                    current_triple_data.clear();
151                }
152                NucleotideData::Adenine(term)
153                | NucleotideData::Thymine(term)
154                | NucleotideData::Guanine(term)
155                    if in_gene =>
156                {
157                    current_triple_data.push(term.clone());
158                }
159                _ => {
160                    // Skip other special markers or non-gene nucleotides during decoding
161                }
162            }
163        }
164
165        Ok(triples)
166    }
167
168    /// Add genetic marker for indexing
169    pub fn add_genetic_marker(&mut self, name: String, position: usize) {
170        self.genetic_markers.insert(name, position);
171    }
172
173    /// Find position by genetic marker
174    pub fn find_by_marker(&self, marker: &str) -> Option<usize> {
175        self.genetic_markers.get(marker).copied()
176    }
177
178    /// Validate strand integrity
179    pub fn validate_integrity(&self) -> bool {
180        if self.primary_strand.len() != self.complementary_strand.len() {
181            return false;
182        }
183
184        // Check complementary base pairing rules
185        for (primary, complement) in self
186            .primary_strand
187            .iter()
188            .zip(self.complementary_strand.iter())
189        {
190            if !self.is_valid_base_pair(primary, complement) {
191                return false;
192            }
193        }
194
195        true
196    }
197
198    /// Check if two nucleotides form a valid base pair
199    fn is_valid_base_pair(&self, primary: &NucleotideData, complement: &NucleotideData) -> bool {
200        matches!(
201            (primary, complement),
202            (NucleotideData::Adenine(_), NucleotideData::Thymine(_))
203                | (NucleotideData::Thymine(_), NucleotideData::Adenine(_))
204                | (NucleotideData::Guanine(_), NucleotideData::Cytosine(_))
205                | (NucleotideData::Cytosine(_), NucleotideData::Guanine(_))
206        )
207    }
208
209    /// Get strand length
210    pub fn length(&self) -> usize {
211        self.primary_strand.len()
212    }
213
214    /// Get memory usage estimate
215    pub fn memory_usage(&self) -> usize {
216        std::mem::size_of::<Self>()
217            + self.primary_strand.capacity() * std::mem::size_of::<NucleotideData>()
218            + self.complementary_strand.capacity() * std::mem::size_of::<NucleotideData>()
219            + self.genetic_markers.capacity()
220                * (std::mem::size_of::<String>() + std::mem::size_of::<usize>())
221    }
222}
223
224impl SpecialMarker {
225    /// Get the type name of the marker
226    pub fn type_name(&self) -> &'static str {
227        match self {
228            SpecialMarker::StartCodon => "start_codon",
229            SpecialMarker::StopCodon => "stop_codon",
230            SpecialMarker::Promoter(_) => "promoter",
231            SpecialMarker::Operator(_) => "operator",
232            SpecialMarker::Enhancer(_) => "enhancer",
233            SpecialMarker::Silencer(_) => "silencer",
234            SpecialMarker::MethylationSite(_) => "methylation_site",
235            SpecialMarker::HistoneBinding(_) => "histone_binding",
236        }
237    }
238}
239
240impl Default for DnaDataStructure {
241    fn default() -> Self {
242        Self::new()
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use crate::model::NamedNode;
250
251    #[test]
252    fn test_dna_structure_creation() {
253        let dna = DnaDataStructure::new();
254        assert_eq!(dna.length(), 0);
255        assert!(dna.validate_integrity());
256    }
257
258    #[test]
259    fn test_triple_encoding_decoding() {
260        let mut dna = DnaDataStructure::new();
261
262        let triple = Triple::new(
263            NamedNode::new("http://example.org/subject").expect("valid IRI"),
264            NamedNode::new("http://example.org/predicate").expect("valid IRI"),
265            NamedNode::new("http://example.org/object").expect("valid IRI"),
266        );
267
268        dna.encode_triple(&triple)
269            .expect("operation should succeed");
270        let decoded = dna.decode_triples().expect("operation should succeed");
271
272        assert_eq!(decoded.len(), 1);
273        assert_eq!(decoded[0], triple);
274    }
275
276    #[test]
277    fn test_genetic_markers() {
278        let mut dna = DnaDataStructure::new();
279
280        dna.add_genetic_marker("test_marker".to_string(), 42);
281        assert_eq!(dna.find_by_marker("test_marker"), Some(42));
282        assert_eq!(dna.find_by_marker("nonexistent"), None);
283    }
284
285    #[test]
286    fn test_strand_integrity() {
287        let mut dna = DnaDataStructure::new();
288
289        let triple = Triple::new(
290            NamedNode::new("http://example.org/s").expect("valid IRI"),
291            NamedNode::new("http://example.org/p").expect("valid IRI"),
292            NamedNode::new("http://example.org/o").expect("valid IRI"),
293        );
294
295        dna.encode_triple(&triple)
296            .expect("operation should succeed");
297        assert!(dna.validate_integrity());
298    }
299}