oxirs-core 0.2.2

Core RDF and SPARQL functionality for OxiRS - native Rust implementation with zero dependencies
Documentation
//! DNA-inspired data structures for RDF storage

use super::replication::ReplicationMachinery;
use super::types::*;
use crate::error::OxirsResult;
use crate::model::{Object, Predicate, Subject, Term, Triple};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// DNA-inspired data structure for RDF storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DnaDataStructure {
    /// Primary DNA strand (main data)
    pub primary_strand: Vec<NucleotideData>,
    /// Complementary strand (redundancy/validation)
    pub complementary_strand: Vec<NucleotideData>,
    /// Genetic markers for indexing
    pub genetic_markers: HashMap<String, usize>,
    /// Chromosome segments for partitioning
    pub chromosomes: Vec<ChromosomeSegment>,
    /// Replication machinery
    pub replication_machinery: ReplicationMachinery,
}

/// Nucleotide representation for RDF data
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum NucleotideData {
    /// Adenine - represents subjects
    Adenine(Term),
    /// Thymine - represents predicates
    Thymine(Term),
    /// Guanine - represents objects
    Guanine(Term),
    /// Cytosine - represents special markers
    Cytosine(SpecialMarker),
}

/// Special markers for DNA structure
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SpecialMarker {
    /// Start of gene (triple boundary)
    StartCodon,
    /// End of gene (triple boundary)
    StopCodon,
    /// Promoter region (index marker)
    Promoter(String),
    /// Operator (access control)
    Operator(AccessLevel),
    /// Enhancer (performance boost)
    Enhancer(String),
    /// Silencer (access restriction)
    Silencer(String),
    /// Methylation site (caching marker)
    MethylationSite(MethylationPattern),
    /// Histone binding site (compression marker)
    HistoneBinding(HistoneModification),
}

impl DnaDataStructure {
    /// Create a new DNA-inspired data structure
    pub fn new() -> Self {
        Self {
            primary_strand: Vec::new(),
            complementary_strand: Vec::new(),
            genetic_markers: HashMap::new(),
            chromosomes: Vec::new(),
            replication_machinery: ReplicationMachinery::new(),
        }
    }

    /// Encode a triple into nucleotide sequence
    pub fn encode_triple(&mut self, triple: &Triple) -> OxirsResult<()> {
        // Add start codon
        self.primary_strand
            .push(NucleotideData::Cytosine(SpecialMarker::StartCodon));

        // Encode subject as Adenine
        self.primary_strand
            .push(NucleotideData::Adenine(triple.subject().clone().into()));

        // Encode predicate as Thymine
        self.primary_strand
            .push(NucleotideData::Thymine(triple.predicate().clone().into()));

        // Encode object as Guanine
        self.primary_strand
            .push(NucleotideData::Guanine(triple.object().clone().into()));

        // Add stop codon
        self.primary_strand
            .push(NucleotideData::Cytosine(SpecialMarker::StopCodon));

        // Generate complementary strand
        self.synthesize_complementary_strand()?;

        Ok(())
    }

    /// Synthesize complementary strand for validation
    fn synthesize_complementary_strand(&mut self) -> OxirsResult<()> {
        self.complementary_strand.clear();

        for nucleotide in &self.primary_strand {
            let complement = match nucleotide {
                NucleotideData::Adenine(term) => NucleotideData::Thymine(term.clone()),
                NucleotideData::Thymine(term) => NucleotideData::Adenine(term.clone()),
                NucleotideData::Guanine(term) => {
                    NucleotideData::Cytosine(SpecialMarker::Enhancer(term.to_string()))
                }
                NucleotideData::Cytosine(marker) => NucleotideData::Guanine(Term::NamedNode(
                    crate::model::NamedNode::new(format!("marker:{}", marker.type_name()))
                        .expect("marker IRI is valid"),
                )),
            };
            self.complementary_strand.push(complement);
        }

        Ok(())
    }

    /// Decode nucleotide sequence back to triples
    pub fn decode_triples(&self) -> OxirsResult<Vec<Triple>> {
        let mut triples = Vec::new();
        let mut current_triple_data: Vec<Term> = Vec::new();
        let mut in_gene = false;

        for nucleotide in &self.primary_strand {
            match nucleotide {
                NucleotideData::Cytosine(SpecialMarker::StartCodon) => {
                    in_gene = true;
                    current_triple_data.clear();
                }
                NucleotideData::Cytosine(SpecialMarker::StopCodon) => {
                    if in_gene && current_triple_data.len() == 3 {
                        if let (Some(subject), Some(predicate), Some(object)) = (
                            current_triple_data.first(),
                            current_triple_data.get(1),
                            current_triple_data.get(2),
                        ) {
                            if let (Ok(s), Ok(p)) = (
                                Subject::try_from(subject.clone()),
                                Predicate::try_from(predicate.clone()),
                            ) {
                                let o: Object = object.clone().into();
                                triples.push(Triple::new(s, p, o));
                            }
                        }
                    }
                    in_gene = false;
                    current_triple_data.clear();
                }
                NucleotideData::Adenine(term)
                | NucleotideData::Thymine(term)
                | NucleotideData::Guanine(term) => {
                    if in_gene {
                        current_triple_data.push(term.clone());
                    }
                }
                _ => {
                    // Skip other special markers during decoding
                }
            }
        }

        Ok(triples)
    }

    /// Add genetic marker for indexing
    pub fn add_genetic_marker(&mut self, name: String, position: usize) {
        self.genetic_markers.insert(name, position);
    }

    /// Find position by genetic marker
    pub fn find_by_marker(&self, marker: &str) -> Option<usize> {
        self.genetic_markers.get(marker).copied()
    }

    /// Validate strand integrity
    pub fn validate_integrity(&self) -> bool {
        if self.primary_strand.len() != self.complementary_strand.len() {
            return false;
        }

        // Check complementary base pairing rules
        for (primary, complement) in self
            .primary_strand
            .iter()
            .zip(self.complementary_strand.iter())
        {
            if !self.is_valid_base_pair(primary, complement) {
                return false;
            }
        }

        true
    }

    /// Check if two nucleotides form a valid base pair
    fn is_valid_base_pair(&self, primary: &NucleotideData, complement: &NucleotideData) -> bool {
        matches!(
            (primary, complement),
            (NucleotideData::Adenine(_), NucleotideData::Thymine(_))
                | (NucleotideData::Thymine(_), NucleotideData::Adenine(_))
                | (NucleotideData::Guanine(_), NucleotideData::Cytosine(_))
                | (NucleotideData::Cytosine(_), NucleotideData::Guanine(_))
        )
    }

    /// Get strand length
    pub fn length(&self) -> usize {
        self.primary_strand.len()
    }

    /// Get memory usage estimate
    pub fn memory_usage(&self) -> usize {
        std::mem::size_of::<Self>()
            + self.primary_strand.capacity() * std::mem::size_of::<NucleotideData>()
            + self.complementary_strand.capacity() * std::mem::size_of::<NucleotideData>()
            + self.genetic_markers.capacity()
                * (std::mem::size_of::<String>() + std::mem::size_of::<usize>())
    }
}

impl SpecialMarker {
    /// Get the type name of the marker
    pub fn type_name(&self) -> &'static str {
        match self {
            SpecialMarker::StartCodon => "start_codon",
            SpecialMarker::StopCodon => "stop_codon",
            SpecialMarker::Promoter(_) => "promoter",
            SpecialMarker::Operator(_) => "operator",
            SpecialMarker::Enhancer(_) => "enhancer",
            SpecialMarker::Silencer(_) => "silencer",
            SpecialMarker::MethylationSite(_) => "methylation_site",
            SpecialMarker::HistoneBinding(_) => "histone_binding",
        }
    }
}

impl Default for DnaDataStructure {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::model::NamedNode;

    #[test]
    fn test_dna_structure_creation() {
        let dna = DnaDataStructure::new();
        assert_eq!(dna.length(), 0);
        assert!(dna.validate_integrity());
    }

    #[test]
    fn test_triple_encoding_decoding() {
        let mut dna = DnaDataStructure::new();

        let triple = Triple::new(
            NamedNode::new("http://example.org/subject").expect("valid IRI"),
            NamedNode::new("http://example.org/predicate").expect("valid IRI"),
            NamedNode::new("http://example.org/object").expect("valid IRI"),
        );

        dna.encode_triple(&triple)
            .expect("operation should succeed");
        let decoded = dna.decode_triples().expect("operation should succeed");

        assert_eq!(decoded.len(), 1);
        assert_eq!(decoded[0], triple);
    }

    #[test]
    fn test_genetic_markers() {
        let mut dna = DnaDataStructure::new();

        dna.add_genetic_marker("test_marker".to_string(), 42);
        assert_eq!(dna.find_by_marker("test_marker"), Some(42));
        assert_eq!(dna.find_by_marker("nonexistent"), None);
    }

    #[test]
    fn test_strand_integrity() {
        let mut dna = DnaDataStructure::new();

        let triple = Triple::new(
            NamedNode::new("http://example.org/s").expect("valid IRI"),
            NamedNode::new("http://example.org/p").expect("valid IRI"),
            NamedNode::new("http://example.org/o").expect("valid IRI"),
        );

        dna.encode_triple(&triple)
            .expect("operation should succeed");
        assert!(dna.validate_integrity());
    }
}