use super::replication::ReplicationMachinery;
use super::types::*;
use crate::error::OxirsResult;
use crate::model::{Object, Predicate, Subject, Term, Triple};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DnaDataStructure {
pub primary_strand: Vec<NucleotideData>,
pub complementary_strand: Vec<NucleotideData>,
pub genetic_markers: HashMap<String, usize>,
pub chromosomes: Vec<ChromosomeSegment>,
pub replication_machinery: ReplicationMachinery,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum NucleotideData {
Adenine(Term),
Thymine(Term),
Guanine(Term),
Cytosine(SpecialMarker),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SpecialMarker {
StartCodon,
StopCodon,
Promoter(String),
Operator(AccessLevel),
Enhancer(String),
Silencer(String),
MethylationSite(MethylationPattern),
HistoneBinding(HistoneModification),
}
impl DnaDataStructure {
pub fn new() -> Self {
Self {
primary_strand: Vec::new(),
complementary_strand: Vec::new(),
genetic_markers: HashMap::new(),
chromosomes: Vec::new(),
replication_machinery: ReplicationMachinery::new(),
}
}
pub fn encode_triple(&mut self, triple: &Triple) -> OxirsResult<()> {
self.primary_strand
.push(NucleotideData::Cytosine(SpecialMarker::StartCodon));
self.primary_strand
.push(NucleotideData::Adenine(triple.subject().clone().into()));
self.primary_strand
.push(NucleotideData::Thymine(triple.predicate().clone().into()));
self.primary_strand
.push(NucleotideData::Guanine(triple.object().clone().into()));
self.primary_strand
.push(NucleotideData::Cytosine(SpecialMarker::StopCodon));
self.synthesize_complementary_strand()?;
Ok(())
}
fn synthesize_complementary_strand(&mut self) -> OxirsResult<()> {
self.complementary_strand.clear();
for nucleotide in &self.primary_strand {
let complement = match nucleotide {
NucleotideData::Adenine(term) => NucleotideData::Thymine(term.clone()),
NucleotideData::Thymine(term) => NucleotideData::Adenine(term.clone()),
NucleotideData::Guanine(term) => {
NucleotideData::Cytosine(SpecialMarker::Enhancer(term.to_string()))
}
NucleotideData::Cytosine(marker) => NucleotideData::Guanine(Term::NamedNode(
crate::model::NamedNode::new(format!("marker:{}", marker.type_name()))
.expect("marker IRI is valid"),
)),
};
self.complementary_strand.push(complement);
}
Ok(())
}
pub fn decode_triples(&self) -> OxirsResult<Vec<Triple>> {
let mut triples = Vec::new();
let mut current_triple_data: Vec<Term> = Vec::new();
let mut in_gene = false;
for nucleotide in &self.primary_strand {
match nucleotide {
NucleotideData::Cytosine(SpecialMarker::StartCodon) => {
in_gene = true;
current_triple_data.clear();
}
NucleotideData::Cytosine(SpecialMarker::StopCodon) => {
if in_gene && current_triple_data.len() == 3 {
if let (Some(subject), Some(predicate), Some(object)) = (
current_triple_data.first(),
current_triple_data.get(1),
current_triple_data.get(2),
) {
if let (Ok(s), Ok(p)) = (
Subject::try_from(subject.clone()),
Predicate::try_from(predicate.clone()),
) {
let o: Object = object.clone().into();
triples.push(Triple::new(s, p, o));
}
}
}
in_gene = false;
current_triple_data.clear();
}
NucleotideData::Adenine(term)
| NucleotideData::Thymine(term)
| NucleotideData::Guanine(term) => {
if in_gene {
current_triple_data.push(term.clone());
}
}
_ => {
}
}
}
Ok(triples)
}
pub fn add_genetic_marker(&mut self, name: String, position: usize) {
self.genetic_markers.insert(name, position);
}
pub fn find_by_marker(&self, marker: &str) -> Option<usize> {
self.genetic_markers.get(marker).copied()
}
pub fn validate_integrity(&self) -> bool {
if self.primary_strand.len() != self.complementary_strand.len() {
return false;
}
for (primary, complement) in self
.primary_strand
.iter()
.zip(self.complementary_strand.iter())
{
if !self.is_valid_base_pair(primary, complement) {
return false;
}
}
true
}
fn is_valid_base_pair(&self, primary: &NucleotideData, complement: &NucleotideData) -> bool {
matches!(
(primary, complement),
(NucleotideData::Adenine(_), NucleotideData::Thymine(_))
| (NucleotideData::Thymine(_), NucleotideData::Adenine(_))
| (NucleotideData::Guanine(_), NucleotideData::Cytosine(_))
| (NucleotideData::Cytosine(_), NucleotideData::Guanine(_))
)
}
pub fn length(&self) -> usize {
self.primary_strand.len()
}
pub fn memory_usage(&self) -> usize {
std::mem::size_of::<Self>()
+ self.primary_strand.capacity() * std::mem::size_of::<NucleotideData>()
+ self.complementary_strand.capacity() * std::mem::size_of::<NucleotideData>()
+ self.genetic_markers.capacity()
* (std::mem::size_of::<String>() + std::mem::size_of::<usize>())
}
}
impl SpecialMarker {
pub fn type_name(&self) -> &'static str {
match self {
SpecialMarker::StartCodon => "start_codon",
SpecialMarker::StopCodon => "stop_codon",
SpecialMarker::Promoter(_) => "promoter",
SpecialMarker::Operator(_) => "operator",
SpecialMarker::Enhancer(_) => "enhancer",
SpecialMarker::Silencer(_) => "silencer",
SpecialMarker::MethylationSite(_) => "methylation_site",
SpecialMarker::HistoneBinding(_) => "histone_binding",
}
}
}
impl Default for DnaDataStructure {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::NamedNode;
#[test]
fn test_dna_structure_creation() {
let dna = DnaDataStructure::new();
assert_eq!(dna.length(), 0);
assert!(dna.validate_integrity());
}
#[test]
fn test_triple_encoding_decoding() {
let mut dna = DnaDataStructure::new();
let triple = Triple::new(
NamedNode::new("http://example.org/subject").expect("valid IRI"),
NamedNode::new("http://example.org/predicate").expect("valid IRI"),
NamedNode::new("http://example.org/object").expect("valid IRI"),
);
dna.encode_triple(&triple)
.expect("operation should succeed");
let decoded = dna.decode_triples().expect("operation should succeed");
assert_eq!(decoded.len(), 1);
assert_eq!(decoded[0], triple);
}
#[test]
fn test_genetic_markers() {
let mut dna = DnaDataStructure::new();
dna.add_genetic_marker("test_marker".to_string(), 42);
assert_eq!(dna.find_by_marker("test_marker"), Some(42));
assert_eq!(dna.find_by_marker("nonexistent"), None);
}
#[test]
fn test_strand_integrity() {
let mut dna = DnaDataStructure::new();
let triple = Triple::new(
NamedNode::new("http://example.org/s").expect("valid IRI"),
NamedNode::new("http://example.org/p").expect("valid IRI"),
NamedNode::new("http://example.org/o").expect("valid IRI"),
);
dna.encode_triple(&triple)
.expect("operation should succeed");
assert!(dna.validate_integrity());
}
}