1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
//use super::{Domain, Strand};
use crate::Design;
use std::io::Write;
//use std::path::Path;
use itertools::Itertools;
use std::collections::HashMap;

/*
Informations about the format Mrdna :

- the extension is .dat or .txt
- the file contains three lists : a list of basepair, a list of stack, a list of phosphodiester bond. When there is a direction it is from 5' to 3'
    more details at https://gitlab.engr.illinois.edu/tbgl/tools/mrdna/-/blob/master/mrdna/readers/segmentmodel_from_lists.py
- the file is loaded using data = np.loadtxt(infile) (informative garbage on the first line is allowed
- the coordinates are in data[:,:3] ( they are then multiplied by 10 but I think the unit is in nm)
- the coordinates of the stack are in data[:,4]
- the coordinates of three_prime are in data[:,5]
*/

fn save_as_np_txt<W: Write>(
    mut w: W,
    position_list: &Vec<[f64; 3]>,
    stack_list: &Vec<isize>,
    three_prime_list: &Vec<isize>,
    basepair_list: &Vec<isize>,
) -> Result<(), anyhow::Error> {
    // mimic the numpy function save_txt

    for i in 0..position_list.len() {
        write!(
            w,
            "{:.} {:.} {:.} {} {} {}\n",
            position_list[i][0],
            position_list[i][1],
            position_list[i][2],
            basepair_list[i],
            stack_list[i],
            three_prime_list[i],
        )?;
    }

    Ok(())
}

impl<StrandLabel, DomainLabel> Design<StrandLabel, DomainLabel> {
    /// Output a design in Cadnano format.
    pub fn to_mrdna<W: Write>(&self, w: W) -> Result<(), anyhow::Error> {
        // the nucleotides are labeled in increasing order following the nucleotides of each strands (in the order of the index of the strands)

        let nucl_map = self.get_nucl_map();

        let mut nucl_number = HashMap::new(); // map each nucleotide to a number

        // maps nucleotide ( as representend by nucl_map) to stack, three_prime ( in 5' to 3' direction), basepair, positions
        let mut stack: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut three_prime: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut basepair: HashMap<(isize, isize, bool), (isize, isize, bool)> = HashMap::new();
        let mut position_list = vec![[0., 0., 0.]; nucl_map.len()];

        /* old numerotation of the nucleotides
        for (i,( kk, strands)) in nucl_map.iter().enumerate() {
            let k = *kk;
            let (helix,n, forward ) = k;
            assert!(strands.len()<=1, "there is a conflict on a nucleotide, this format cannot handle conflicts");

            nucl_number.insert(k,i); //  register the nucleotide index number

            position_list[i] = self.space_pos(helix,n, forward); // fill the positions array

            if nucl_map.contains_key(&(helix,n, !forward )) { // if there is a nucleotide on the complementary strand
                //basepair.insert((helix,n, forward ), (helix,n, !forward )); this one will be done when looking at (helix,n, !forward )
                basepair.insert((helix,n, !forward ), (helix,n, forward ));
            }
        }
        */

        for (r_nucl, _strands) in nucl_map.iter() {
            let nucl = *r_nucl;
            let (helix, n, forward) = nucl;

            let next_nucl = (helix, if forward { n + 1 } else { n - 1 }, forward);
            if nucl_map.contains_key(&next_nucl) {
                stack.insert(nucl, next_nucl);
            }
        }

        //  syntax_Error

        let mut i_nucl_max = 0;

        for i in 0..self.strands.len() {
            let s = &self.strands[i];
            let mut v_strand = Vec::new();
            for d in &s.domains {
                for n in d.iter() {
                    let nucl = (d.helix, n, d.forward);

                    nucl_number.insert(nucl, i_nucl_max); //  register the nucleotide index number
                    position_list[i_nucl_max] = self.space_pos(d.helix, n, d.forward); // fill the positions array
                    if nucl_map.contains_key(&(d.helix, n, !d.forward)) {
                        // if there is a nucleotide on the complementary strand register the bp
                        basepair.insert(nucl, (d.helix, n, !d.forward));
                    }

                    i_nucl_max += 1;
                    v_strand.push(nucl);
                }
            } // v_strand contains all the informations of the nucleotides of the strand in the sense order
              // v_strand is in the 5' to 3' order.

            //println!("v_strand {:?}",v_strand);
            for (r_nucl_prev, r_nucl_next) in v_strand.iter().tuple_windows() {
                let nucl_prev = *r_nucl_prev;
                let nucl_next = *r_nucl_next;

                three_prime.insert(nucl_prev, nucl_next);

                let nucleotides_linked = |n1, n2| {
                    let v_d1 = &nucl_map.get(n1);
                    let v_d2 = &nucl_map.get(n2);

                    if v_d1.is_some() && v_d2.is_some() {
                        match (v_d1.unwrap().last(), v_d2.unwrap().last()) {
                            (Some(d1), Some(d2)) => d1 == d2, // since we have checked that there is only one domain per nucleotide, we can just take the last
                            _ => false,
                        }
                    } else {
                        false
                    }
                };

                let _stack_exists = {
                    let (helix1, n1, forward1) = nucl_prev;
                    let (helix2, n2, forward2) = nucl_next;

                    if helix1 == helix2 {
                        // link on the same helix, check for the same on the complementary
                        true
                    //nucleotides_linked(&(helix1,n1, !forward1 ),&(helix2,n2, !forward2 ))
                    } else {
                        let nucl_stack1 = if forward1 {
                            (helix1, n1 + 1, forward1)
                        } else {
                            (helix1, n1 - 1, forward1)
                        };
                        let nucl_stack2 = if forward2 {
                            (helix2, n2 - 1, forward2)
                        } else {
                            (helix2, n2 + 1, forward2)
                        };
                        nucleotides_linked(&nucl_stack1, &nucl_stack2)
                    }
                };

                /*if _stack_exists {
                    stack.insert(nucl_prev,nucl_next);
                }*/
            }
        }

        let to_list = |hashmap: HashMap<(isize, isize, bool), (isize, isize, bool)>| {
            // take a hashmap mapping nucleotides to nucleotides and returns a vector
            // encoding the map using the indexes of nucl_number.
            // the absence of an image is reprented by -1
            let mut v = vec![-1; nucl_map.len()];
            for (k_n, n) in hashmap.iter() {
                v[nucl_number[k_n]] = nucl_number[n] as isize;
            }
            v
        };

        assert!(
            basepair.len() > 0,
            "mrdna does not accept design only with single strands"
        );

        let stack_list = to_list(stack);
        let three_prime_list = to_list(three_prime);
        let basepair_list = to_list(basepair);

        assert!(
            {
                let len_p = position_list.len();
                stack_list.len() == len_p
                    && three_prime_list.len() == len_p
                    && basepair_list.len() == len_p
            },
            "all the len must be equal"
        );

        //println!("stack {:?} \n3' {:?} \nbp {:?}",stack_list,three_prime_list,basepair_list );
        save_as_np_txt::<W>(
            w,
            &position_list,
            &stack_list,
            &three_prime_list,
            &basepair_list,
        )
    }
}