codenano 0.5.1

A library for editing DNA molecular designs
Documentation
//use super::{Domain, Strand};
use crate::Design;
use std::io::Write;
//use std::path::Path;
use itertools::Itertools;
use std::collections::HashMap;

/*
Informations about the format Mrdna :

- the extension is .dat or .txt
- the file contains three lists : a list of basepair, a list of stack, a list of phosphodiester bond. When there is a direction it is from 5' to 3'
    more details at https://gitlab.engr.illinois.edu/tbgl/tools/mrdna/-/blob/master/mrdna/readers/segmentmodel_from_lists.py
- the file is loaded using data = np.loadtxt(infile) (informative garbage on the first line is allowed
- the coordinates are in data[:,:3] ( they are then multiplied by 10 but I think the unit is in nm)
- the coordinates of the stack are in data[:,4]
- the coordinates of three_prime are in data[:,5]
*/

fn save_as_np_txt<W: Write>(
    mut w: W,
    position_list: &Vec<[f64; 3]>,
    stack_list: &Vec<isize>,
    three_prime_list: &Vec<isize>,
    basepair_list: &Vec<isize>,
) -> Result<(), anyhow::Error> {
    // mimic the numpy function save_txt

    for i in 0..position_list.len() {
        write!(
            w,
            "{:.} {:.} {:.} {} {} {}\n",
            position_list[i][0],
            position_list[i][1],
            position_list[i][2],
            basepair_list[i],
            stack_list[i],
            three_prime_list[i],
        )?;
    }

    Ok(())
}

impl<StrandLabel, DomainLabel> Design<StrandLabel, DomainLabel> {
    /// Output a design in Cadnano format.
    pub fn to_mrdna<W: Write>(&self, w: W) -> Result<(), anyhow::Error> {
        // the nucleotides are labeled in increasing order following the nucleotides of each strands (in the order of the index of the strands)

        let nucl_map = self.get_nucl_map();

        let mut nucl_number = HashMap::new(); // map each nucleotide to a number

        // maps nucleotide ( as representend by nucl_map) to stack, three_prime ( in 5' to 3' direction), basepair, positions
        let mut stack: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut three_prime: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut basepair: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut position_list = vec![[0., 0., 0.]; nucl_map.len()];

        /* old numerotation of the nucleotides
        for (i,( kk, strands)) in nucl_map.iter().enumerate() {
            let k = *kk;
            let (helix,n, forward ) = k;
            assert!(strands.len()<=1, "there is a conflict on a nucleotide, this format cannot handle conflicts");

            nucl_number.insert(k,i); //  register the nucleotide index number

            position_list[i] = self.space_pos(helix,n, forward); // fill the positions array

            if nucl_map.contains_key(&(helix,n, !forward )) { // if there is a nucleotide on the complementary strand
                //basepair.insert((helix,n, forward ), (helix,n, !forward )); this one will be done when looking at (helix,n, !forward )
                basepair.insert((helix,n, !forward ), (helix,n, forward ));
            }
        }
        */

        for (r_nucl, _strands) in nucl_map.iter() {
            let nucl = *r_nucl;
            let (helix, n, forward) = nucl;

            let next_nucl = (helix, if forward { n + 1 } else { n - 1 }, forward);
            if nucl_map.contains_key(&next_nucl) {
                stack.insert(nucl, next_nucl);
            }
        }

        //  syntax_Error

        let mut i_nucl_max = 0;

        for i in 0..self.strands.len() {
            let s = &self.strands[i];
            let mut v_strand = Vec::new();
            for d in &s.domains {
                for n in d.iter() {
                    let nucl = (d.helix, n, d.forward);

                    nucl_number.insert(nucl, i_nucl_max); //  register the nucleotide index number
                    position_list[i_nucl_max] = self.space_pos(d.helix, n, d.forward); // fill the positions array
                    if nucl_map.contains_key(&(d.helix, n, !d.forward)) {
                        // if there is a nucleotide on the complementary strand register the bp
                        basepair.insert(nucl, (d.helix, n, !d.forward));
                    }

                    i_nucl_max += 1;
                    v_strand.push(nucl);
                }
            } // v_strand contains all the informations of the nucleotides of the strand in the sense order
              // v_strand is in the 5' to 3' order.

            //println!("v_strand {:?}",v_strand);
            for (r_nucl_prev, r_nucl_next) in v_strand.iter().tuple_windows() {
                let nucl_prev = *r_nucl_prev;
                let nucl_next = *r_nucl_next;

                three_prime.insert(nucl_prev, nucl_next);

                let nucleotides_linked = |n1, n2| {
                    let v_d1 = &nucl_map.get(n1);
                    let v_d2 = &nucl_map.get(n2);

                    if v_d1.is_some() && v_d2.is_some() {
                        match (v_d1.unwrap().last(), v_d2.unwrap().last()) {
                            (Some(d1), Some(d2)) => d1 == d2, // since we have checked that there is only one domain per nucleotide, we can just take the last
                            _ => false,
                        }
                    } else {
                        false
                    }
                };

                let _stack_exists = {
                    let (helix1, n1, forward1) = nucl_prev;
                    let (helix2, n2, forward2) = nucl_next;

                    if helix1 == helix2 {
                        // link on the same helix, check for the same on the complementary
                        true
                    //nucleotides_linked(&(helix1,n1, !forward1 ),&(helix2,n2, !forward2 ))
                    } else {
                        let nucl_stack1 = if forward1 {
                            (helix1, n1 + 1, forward1)
                        } else {
                            (helix1, n1 - 1, forward1)
                        };
                        let nucl_stack2 = if forward2 {
                            (helix2, n2 - 1, forward2)
                        } else {
                            (helix2, n2 + 1, forward2)
                        };
                        nucleotides_linked(&nucl_stack1, &nucl_stack2)
                    }
                };

                /*if _stack_exists {
                    stack.insert(nucl_prev,nucl_next);
                }*/
            }
        }

        let to_list = |hashmap: HashMap<(isize, isize, bool), (isize, isize, bool)>| {
            // take a hashmap mapping nucleotides to nucleotides and returns a vector
            // encoding the map using the indexes of nucl_number.
            // the absence of an image is reprented by -1
            let mut v = vec![-1; nucl_map.len()];
            for (k_n, n) in hashmap.iter() {
                v[nucl_number[k_n]] = nucl_number[n] as isize;
            }
            v
        };

        assert!(
            basepair.len() > 0,
            "mrdna does not accept design only with single strands"
        );

        let stack_list = to_list(stack);
        let three_prime_list = to_list(three_prime);
        let basepair_list = to_list(basepair);

        assert!(
            {
                let len_p = position_list.len();
                stack_list.len() == len_p
                    && three_prime_list.len() == len_p
                    && basepair_list.len() == len_p
            },
            "all the len must be equal"
        );

        //println!("stack {:?} \n3' {:?} \nbp {:?}",stack_list,three_prime_list,basepair_list );
        save_as_np_txt::<W>(
            w,
            &position_list,
            &stack_list,
            &three_prime_list,
            &basepair_list,
        )
    }
}