gff_diff/
lib.rs

1extern crate bio;
2#[macro_use]
3extern crate serde_json;
4#[macro_use]
5extern crate lazy_static;
6
7use bio::io::gff;
8use ignore_result::Ignore;
9use libflate::gzip::Decoder;
10use multimap::MultiMap;
11use rayon::prelude::*;
12use regex::Regex;
13use serde_json::value::Value;
14use std::collections::HashMap;
15use std::collections::HashSet;
16use std::error::Error;
17use std::fs::File;
18use std::sync::{Arc, Mutex};
19
20type HashGFF = HashMap<String, bio::io::gff::Record>;
21
22pub enum CompareMode {
23    Forward,
24    Reverse,
25}
26
27pub struct CompareGFF {
28    data1: Option<HashGFF>,
29    data2: Option<HashGFF>,
30    record_issues: bool,
31    flexible_ids: bool,
32    issues: Vec<String>,
33}
34
35impl CompareGFF {
36    /// Creates a new, blank CompareGFF.
37    pub fn new() -> Self {
38        Self {
39            data1: None,
40            data2: None,
41            record_issues: false,
42            flexible_ids: true,
43            issues: vec![],
44        }
45    }
46
47    /// Changes the option to record issues in the result.
48    pub fn record_issues(&mut self, do_record: bool) {
49        self.record_issues = do_record;
50    }
51
52    /// Changes the option to create new IDs in case of duplicates.
53    pub fn flexible_ids(&mut self, use_flexible_ids: bool) {
54        self.flexible_ids = use_flexible_ids;
55    }
56
57    /// Creates a new CompareGFF with two files.
58    pub fn new_from_files<S: Into<String>>(
59        filename1: S,
60        filename2: S,
61    ) -> Result<Self, Box<dyn Error>> {
62        let mut ret = Self::new();
63        ret.load_gff(filename1, 1)?;
64        ret.load_gff(filename2, 2)?;
65        Ok(ret)
66    }
67
68    fn get_file_reader<S: Into<String>>(
69        filename: S,
70    ) -> Result<Box<dyn std::io::Read>, Box<dyn Error>> {
71        lazy_static! {
72            static ref RE_GZ: Regex =
73                Regex::new(r#"\.gz(ip){0,1}$"#).expect("new_from_files: RE_GZ does not compile");
74        }
75        let filename: String = filename.into();
76        if RE_GZ.is_match(&filename) {
77            let file_stream = File::open(filename)?;
78            let decoder = Decoder::new(file_stream)?;
79            Ok(Box::new(decoder))
80        } else {
81            let file_stream = File::open(filename)?;
82            Ok(Box::new(file_stream))
83        }
84    }
85
86    pub fn load_gff<S: Into<String>>(
87        &mut self,
88        filename: S,
89        data_set: u8,
90    ) -> Result<(), Box<dyn Error>> {
91        //let data = Some(self.read(Box::new(File::open(filename.into())?))?);
92        let reader = Self::get_file_reader(filename)?;
93        let data = Some(self.read(reader)?);
94        match data_set {
95            1 => self.data1 = data,
96            2 => self.data2 = data,
97            _ => {
98                return Err(From::from(format!(
99                    "Data set number not 1 or 2: {}",
100                    data_set
101                )))
102            }
103        }
104        Ok(())
105    }
106
107    /// Generates the diff between the two loaded files.
108    pub fn diff(&self) -> Result<Value, Box<dyn Error>> {
109        let mut result = json!( {
110            "changes" :[]
111        });
112        self.compare(CompareMode::Forward, &mut result)?;
113        self.compare(CompareMode::Reverse, &mut result)?;
114        if self.record_issues {
115            result["issues"] = json!(self.issues);
116        }
117        Ok(result)
118    }
119
120    /// Sorts a comparison JSON. Potentially slow. Used in tests.
121    pub fn sort_comparison(result: &mut Value) {
122        match result["changes"].as_array_mut() {
123            Some(changes) => {
124                changes.par_sort_by(|a, b| {
125                    let a = serde_json::to_string(a).unwrap();
126                    let b = serde_json::to_string(b).unwrap();
127                    a.partial_cmp(&b).unwrap()
128                });
129                result["changes"] = json!(changes);
130            }
131            None => {}
132        }
133    }
134
135    pub fn diff_apollo(&self) -> Result<Value, Box<dyn Error>> {
136        self.compare_apollo()
137    }
138
139    fn log_issue(&mut self, issue: String) {
140        if self.record_issues {
141            self.issues.push(issue);
142        } else {
143            eprintln!("{}", issue);
144        }
145    }
146
147    fn unique_id(
148        &mut self,
149        ids: &mut HashSet<String>,
150        id: String,
151        entry: &bio::io::gff::Record,
152    ) -> Result<String, String> {
153        if !ids.contains(&id) {
154            return Ok(id);
155        }
156        let mut id = id;
157        if self.flexible_ids {
158            let original_id = id.to_owned();
159            id += &format!(
160                "_{}_{}_{}",
161                entry.feature_type(),
162                entry.start(),
163                entry.end()
164            );
165            if !ids.contains(&id) {
166                self.log_issue(format!("Double ID {}, changed to {}", &original_id, &id));
167                return Ok(id);
168            }
169        }
170        return Err(format!("Double ID {}, not adding {:?}", &id, entry));
171    }
172
173    /// Reads a file from a Reader into a HashGFF hash table.
174    fn read(&mut self, file: Box<dyn std::io::Read>) -> Result<HashGFF, Box<dyn Error>> {
175        let mut reader = gff::Reader::new(file, gff::GffType::GFF3);
176
177        //TODO check for double IDs?
178        let mut ids: HashSet<String> = HashSet::new();
179        let ret: HashMap<String, bio::io::gff::Record> = reader
180            .records()
181            .filter_map(|element| {
182                let e = element.ok()?;
183                let id = match self.unique_id(&mut ids, e.attributes().get("ID")?.to_string(), &e) {
184                    Ok(id) => id,
185                    Err(e) => {
186                        self.log_issue(e);
187                        return None;
188                    }
189                };
190                ids.insert(id.clone());
191                Some((id, e))
192            })
193            .collect();
194        if ret.is_empty() {
195            return Err(From::from(format!("Empty file or no gff file")));
196        }
197        Ok(ret)
198    }
199
200    /// Writes the GFF data. Used to construct a new file after diff.
201    fn write(&self, file: Box<dyn std::io::Write>, data: &HashGFF) -> Result<(), Box<dyn Error>> {
202        let mut writer = gff::Writer::new(file, gff::GffType::GFF3);
203        for (_k, v) in data {
204            writer.write(v)?;
205        }
206        Ok(())
207    }
208
209    pub fn write_data1(&self, file: Box<dyn std::io::Write>) -> Result<(), Box<dyn Error>> {
210        match &self.data1 {
211            Some(data1) => self.write(file, &data1),
212            None => Err(From::from(format!("write_data1:: data1 is not set"))),
213        }
214    }
215
216    /// Compares the attributes of two GFF elements.
217    fn compare_attributes(
218        &self,
219        id: &String,
220        key: &String,
221        values: &Vec<String>,
222        attrs: &MultiMap<String, String>,
223        mode: CompareMode,
224        result: &Arc<Mutex<&mut Value>>,
225    ) {
226        // Does attrs have that key at all?
227        if !attrs.contains_key(key) {
228            values.par_iter().for_each(|value|{
229                let action = match mode {
230                    CompareMode::Forward => "remove",
231                    _ => "add",
232                };
233                let j = json!( {"action" : action , "what": "attribute" , "id" : id , "key":key.to_string() , "value" : value.to_string() } );
234                result.lock().unwrap()["changes"]
235                    .as_array_mut()
236                    .unwrap()
237                    .push(j);
238            });
239            return;
240        }
241
242        // attrs has the key, compare values
243        let values2 = attrs.get_vec(key).unwrap();
244
245        values2.par_iter().for_each(|value2|{
246            if !values.contains(&value2) {
247                let action = match mode {
248                    CompareMode::Forward => "add",
249                    _ => "remove",
250                };
251                let j = json!({ "action" : action , "what" : "attribute" , "id" : id , "key":key , "value" : value2 } );
252                result.lock().unwrap()["changes"]
253                    .as_array_mut()
254                    .unwrap()
255                    .push(j);
256            }
257        });
258
259        match mode {
260            CompareMode::Forward => {}
261            CompareMode::Reverse => {
262                values.par_iter().for_each(|value|{
263                    if !values2.contains(&value) {
264                        let j = json!({"action" : "add", "what" : "attribute" , "id" : id , "key":key , "value" : value });
265                        result.lock().unwrap()["changes"]
266                            .as_array_mut()
267                            .unwrap()
268                            .push(j);
269                    }
270                });
271            }
272        }
273    }
274
275    /// Compares the basic attributes (location, type etc.) of two GFF elements.
276    fn compare_basics(
277        &self,
278        r1: &bio::io::gff::Record,
279        r2: &bio::io::gff::Record,
280        id: &str,
281    ) -> Vec<Value> {
282        let mut changes: Vec<Value> = vec![];
283        if r1.seqname() != r2.seqname() {
284            let j = json!({ "action" : "update" , "what" : "row" , "id" : id , "key" : "seqname" , "value" : r2.seqname() });
285            changes.push(j);
286        }
287        if r1.source() != r2.source() {
288            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "source" , "value" : r2.source() });
289            changes.push(j);
290        }
291        if r1.feature_type() != r2.feature_type() {
292            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "feature_type" , "value" : r2.feature_type() });
293            changes.push(j);
294        }
295        if r1.start() != r2.start() {
296            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "start" , "value" : r2.start().to_string() });
297            changes.push(j);
298        }
299        if r1.end() != r2.end() {
300            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "end" , "value" : r2.end().to_string() });
301            changes.push(j);
302        }
303        if r1.score() != r2.score() {
304            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "score" , "value" : r2.score() });
305            changes.push(j);
306        }
307        if r1.strand() != r2.strand() {
308            let mut strand: String;
309            strand = ".".to_string();
310            let s = r2.strand();
311            if s.is_some() {
312                strand = s.unwrap().strand_symbol().to_string();
313            }
314            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "strand" , "value" : strand });
315            changes.push(j);
316        }
317        if r1.frame() != r2.frame() {
318            let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "frame" , "value" : r2.frame() });
319            changes.push(j);
320        }
321        changes
322    }
323
324    /// Compares the two loaded GFF files.
325    fn compare(&self, mode: CompareMode, result: &mut Value) -> Result<(), Box<dyn Error>> {
326        let (data1, data2) = match (&self.data1, &self.data2) {
327            (Some(data1), Some(data2)) => match mode {
328                CompareMode::Forward => (data1, data2),
329                CompareMode::Reverse => (data2, data1),
330            },
331            _ => return Err(From::from(format!("Both GFF sets need to be initialized"))),
332        };
333        let result = Arc::new(Mutex::new(result));
334        data1.par_iter().for_each(|(id, r1)| {
335            if data2.contains_key(id) {
336                match mode {
337                    CompareMode::Forward => {}
338                    CompareMode::Reverse => return, // Already did that with CompareMode::Forward
339                }
340                let r2 = &data2[id];
341                self.compare_basics(&r1, r2, id.as_str())
342                    .par_iter()
343                    .for_each(|change| {
344                        result.lock().unwrap()["changes"]
345                            .as_array_mut()
346                            .unwrap()
347                            .push(change.to_owned())
348                    });
349
350                let r1a = r1.attributes();
351                let r2a = r2.attributes();
352                for (key, value) in r1a {
353                    self.compare_attributes(&id, key, value, r2a, CompareMode::Forward, &result);
354                }
355
356                for (key, value) in r2a {
357                    self.compare_attributes(&id, key, value, r1a, CompareMode::Reverse, &result);
358                }
359            } else {
360                match mode {
361                    CompareMode::Forward => {
362                        let mut o = json! ({"what":"row" , "action": "remove" , "id":id });
363                        let s = serde_json::to_string(&r1).unwrap();
364                        o["removed_element"] = serde_json::from_str(&s).unwrap();
365                        result.lock().unwrap()["changes"]
366                            .as_array_mut()
367                            .unwrap()
368                            .push(o);
369                    }
370                    CompareMode::Reverse => {
371                        let mut o = json! ({"what":"row" , "action": "add" , "id":id });
372                        let s = serde_json::to_string(&r1).unwrap();
373                        o["added_element"] = serde_json::from_str(&s).unwrap();
374                        result.lock().unwrap()["changes"]
375                            .as_array_mut()
376                            .unwrap()
377                            .push(o);
378                    }
379                }
380            }
381        });
382        Ok(())
383    }
384
385    fn get_root_parent_id(
386        &self,
387        data: &HashGFF,
388        id: &String,
389        seen: Option<HashSet<String>>,
390    ) -> Option<String> {
391        let mut seen: HashSet<String> = seen.unwrap_or(HashSet::new());
392        if seen.contains(id) {
393            return None; // circular ID chain, oh no
394        }
395        seen.insert(id.to_string());
396        match data.get(id) {
397            Some(element) => match element.attributes().get("Parent") {
398                Some(parent_id) => self.get_root_parent_id(data, parent_id, Some(seen)),
399                None => Some(id.to_string()),
400            },
401            None => None,
402        }
403    }
404
405    fn infer_original_id_from_apollo(
406        &self,
407        data1: &HashGFF,
408        data2: &HashGFF,
409        apollo_element: &bio::io::gff::Record,
410        issues: &mut Vec<String>,
411    ) -> Option<String> {
412        // Try orig_id
413        match apollo_element.attributes().get("orig_id") {
414            Some(orig_id) => {
415                return match data1.get(orig_id) {
416                    Some(_) => Some(orig_id.to_string()),
417                    None => {
418                        issues.push(format!(
419                            "Original ID '{}' given in Apollo GFF is not in full dataset!",
420                            orig_id
421                        ));
422                        None
423                    }
424                }
425            }
426            None => {}
427        }
428
429        // Find Apollo parent
430        let apollo_id = apollo_element.attributes().get("ID")?;
431        let apollo_parent_id = self.get_root_parent_id(data2, apollo_id, None)?;
432        //let apollo_parent_element = data2.get(&apollo_parent_id)?;
433
434        // Find any other Apollo element with that parent and an orig_id
435        let some_apollo_parent_id = Some(apollo_parent_id.to_owned());
436        let orig_parent_id = data2
437            .iter()
438            .filter(|(id, _element)| {
439                self.get_root_parent_id(data2, id, None) == some_apollo_parent_id
440            }) // Same Apollo parent
441            .filter_map(|(_id, element)| element.attributes().get("orig_id")) // with orig_id
442            .map(|s| s.to_string())
443            .filter(|orig_id| data1.contains_key(orig_id)) // with orig_id that exists in original dataset
444            .filter_map(|orig_id| self.get_root_parent_id(data1, &orig_id, None)) // get that original root parent
445            .nth(0)?;
446
447        // Get all (sub)children on that parent in the original
448        let some_orig_parent_id = Some(orig_parent_id);
449        let all_children_orig: HashGFF = data1
450            .iter()
451            .filter(|(_id, data)| data.seqname() == apollo_element.seqname()) // Same chromosome
452            .filter(|(id, _data)| self.get_root_parent_id(data1, id, None) == some_orig_parent_id) // Same root parent
453            .map(|(id, data)| (id.to_owned(), data.to_owned()))
454            .collect();
455
456        // Try original elements with that parent, of the same type
457        let same_type: HashGFF = all_children_orig
458            .iter()
459            .filter(|(_id, data)| data.feature_type() == apollo_element.feature_type())
460            .map(|(id, data)| (id.to_owned(), data.to_owned()))
461            .collect();
462
463        // Found one element with the same type and (root) parent in the original data, using that one
464        if same_type.len() == 1 {
465            return Some(
466                same_type
467                    .iter()
468                    .map(|(id, _data)| id.to_owned())
469                    .nth(0)
470                    .unwrap(),
471            );
472        }
473
474        // TODO try location?
475
476        None
477    }
478
479    /// data1 is "full" GFF, data2 is Apollo GFF
480    fn compare_apollo(&self) -> Result<Value, Box<dyn Error>> {
481        let (data1, data2) = match (&self.data1, &self.data2) {
482            (Some(data1), Some(data2)) => (data1, data2),
483            _ => return Err(From::from(format!("Both GFF sets need to be initialized"))),
484        };
485        let mut issues: Vec<String> = self.issues.clone();
486        let mut changes: Vec<Value> = vec![];
487
488        let _re = Regex::new(r"-\d+$").unwrap();
489
490        for (_id, apollo_element) in data2 {
491            let _attrs = apollo_element.attributes();
492            let original_id = match self.infer_original_id_from_apollo(
493                data1,
494                data2,
495                &apollo_element,
496                &mut issues,
497            ) {
498                Some(id) => id,
499                None => {
500                    issues.push(format!("No original ID found for {:?}", apollo_element));
501                    continue;
502                }
503            };
504            let original_parent_id = match data1.get(&original_id) {
505                Some(e) => e.attributes().get("Parent"),
506                None => None,
507            };
508            let original_element = match data1.get(&original_id) {
509                Some(e) => e,
510                None => {
511                    issues.push(format!("No original element found for {}", &original_id));
512                    continue;
513                }
514            };
515
516            // Add/remove/change parent ID
517            match (
518                original_parent_id,
519                original_element.attributes().get("Parent"),
520            ) {
521                (Some(apollo_opid), Some(original_opid)) => {
522                    if *apollo_opid != *original_opid {
523                        let j = json!({ "action":"update" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : apollo_opid } );
524                        changes.push(j);
525                        let j = json!({ "action":"remove" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : original_opid } );
526                        changes.push(j);
527                    }
528                }
529                (Some(apollo_opid), None) => {
530                    let j = json!({ "action":"add" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : apollo_opid } );
531                    changes.push(j);
532                }
533                (None, Some(original_opid)) => {
534                    let _j = json!({ "action":"remove" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : original_opid } );
535                    //changes.push(j); // IGNORE THIS
536                }
537                (None, None) => {}
538            }
539
540            self.compare_basics(&original_element, &apollo_element, original_id.as_str())
541                .drain(..)
542                .filter(|change| {
543                    match (
544                        change["action"].as_str(),
545                        change["key"].as_str(),
546                        change["value"].as_str(),
547                    ) {
548                        (Some("update"), Some("source"), Some(".")) => false,
549                        _ => true, // Default
550                    }
551                })
552                .for_each(|change| changes.push(change));
553        }
554
555        Ok(match self.record_issues {
556            true => json!({"changes": changes, "issues": issues}),
557            false => json!({ "changes": changes }),
558        })
559    }
560
561    fn gff_from_json(j: &Value) -> Result<bio::io::gff::Record, String> {
562        let mut ret = bio::io::gff::Record::new();
563        Self::apply_diff_row_update(&json!({"key":"seqname","value":&j["seqname"]}), &mut ret)
564            .ignore();
565        Self::apply_diff_row_update(&json!({"key":"source","value":&j["source"]}), &mut ret)
566            .ignore();
567        Self::apply_diff_row_update(
568            &json!({"key":"feature_type","value":&j["feature_type"]}),
569            &mut ret,
570        )
571        .ignore();
572        Self::apply_diff_row_update(&json!({"key":"start","value":&j["start"]}), &mut ret).ignore();
573        Self::apply_diff_row_update(&json!({"key":"end","value":&j["end"]}), &mut ret).ignore();
574        Self::apply_diff_row_update(&json!({"key":"score","value":&j["score"]}), &mut ret).ignore();
575        Self::apply_diff_row_update(&json!({"key":"strand","value":&j["strand"]}), &mut ret)
576            .ignore();
577        Self::apply_diff_row_update(&json!({"key":"frame","value":&j["frame"]}), &mut ret).ignore();
578
579        // Attributes
580        match j["attributes"].as_object() {
581            Some(attributes) => {
582                attributes.iter().for_each(|(key, values)| {
583                    match values.as_array() {
584                        Some(values) => {
585                            values
586                                .iter()
587                                .filter_map(|value| value.as_str())
588                                .for_each(|value| {
589                                    Self::apply_diff_attribute_add(
590                                        &mut ret,
591                                        key.to_string(),
592                                        value.to_string(),
593                                    )
594                                    .ignore();
595                                })
596                        }
597                        None => {} // No values?!?
598                    }
599                });
600            }
601            None => {} // No attributes
602        }
603
604        Ok(ret)
605    }
606
607    fn apply_diff_row_remove(change: &Value, data: &mut HashGFF) -> Result<(), String> {
608        match change["id"].as_str() {
609            Some(id) => {
610                data.remove(id);
611            }
612            None => return Err(format!("apply_diff_row_remove: add row, but no id set")),
613        }
614        Ok(())
615    }
616
617    fn apply_diff_row_update(
618        change: &Value,
619        element: &mut bio::io::gff::Record,
620    ) -> Result<(), String> {
621        let value = match change["value"].as_str() {
622            Some(v) => v.to_string(),
623            None => match change["value"].as_i64() {
624                Some(v) => v.to_string(),
625                None => return Err(format!("apply_diff_row_update: No value in {}", &change)),
626            },
627        };
628        match change["key"].as_str() {
629            Some("seqname") => *element.seqname_mut() = value,
630            Some("source") => *element.source_mut() = value,
631            Some("feature_type") => *element.feature_type_mut() = value,
632            Some("start") => *element.start_mut() = value.parse::<u64>().unwrap(),
633            Some("end") => *element.end_mut() = value.parse::<u64>().unwrap(),
634            Some("score") => *element.score_mut() = value,
635            Some("strand") => *element.strand_mut() = value,
636            Some("frame") => *element.frame_mut() = value,
637            _ => {
638                return Err(format!(
639                    "apply_diff_row_update: Unknown/missing 'key' in {}",
640                    change
641                ))
642            }
643        }
644        Ok(())
645    }
646
647    fn apply_diff_row_add_or_update(change: &Value, data: &mut HashGFF) -> Result<(), String> {
648        match change["id"].as_str() {
649            Some(id) => {
650                let action = change["action"].as_str().unwrap_or("");
651                let element = match action {
652                    "add" => data
653                        .entry(id.to_string())
654                        .or_insert(Self::gff_from_json(&change["added_element"])?),
655                    _ => match data.get_mut(&id.to_string()) {
656                        Some(e) => e,
657                        None => return Err(format!(
658                            "apply_diff_row_add_or_update: {} ID {} does not appear in data set",
659                            action, &id
660                        )),
661                    },
662                };
663                if action == "update" {
664                    Self::apply_diff_row_update(change, element)?;
665                }
666                Ok(())
667            }
668            None => Err(format!(
669                "apply_diff_row_add_or_update: Missing 'id' in {:?}",
670                change
671            )),
672        }
673    }
674
675    fn apply_diff_rows(
676        changes: &Vec<Value>,
677        data: &mut HashGFF,
678        action: &str,
679    ) -> Result<(), Box<dyn Error>> {
680        match changes
681            .iter()
682            .filter(|change| change["what"].as_str().unwrap_or("") == "row")
683            .filter(|change| change["action"].as_str().unwrap_or("") == action)
684            .map(|change| match change["action"].as_str() {
685                Some("remove") => Self::apply_diff_row_remove(change, data),
686                Some("update") | Some("add") => Self::apply_diff_row_add_or_update(change, data),
687                Some(other) => Err(format!(
688                    "apply_diff_rows: Unknown action {} in {:?}",
689                    other, change
690                )),
691                _ => Err(format!("apply_diff_rows: No action in {:?}", change)),
692            })
693            .filter_map(|r| match r {
694                Ok(_) => None,
695                Err(e) => Some(e),
696            })
697            .nth(0)
698        {
699            Some(err) => Err(From::from(err)),
700            None => Ok(()),
701        }
702    }
703
704    fn apply_diff_attribute_add(
705        gff: &mut bio::io::gff::Record,
706        key: String,
707        value: String,
708    ) -> Result<(), String> {
709        gff.attributes_mut().insert(key, value);
710        Ok(())
711    }
712
713    fn apply_diff_attribute_remove(
714        gff: &mut bio::io::gff::Record,
715        key: String,
716        value: String,
717    ) -> Result<(), String> {
718        match gff.attributes_mut().get_vec_mut(&key) {
719            Some(v) => {
720                v.retain(|x| *x != value);
721                Ok(())
722            }
723            None => Err(format!(
724                "No attribute {}/{} to remove from {:?}",
725                key, value, gff
726            )),
727        }
728    }
729
730    fn apply_diff_attributes(
731        changes: &Vec<Value>,
732        data: &mut HashGFF,
733        action: &str,
734    ) -> Result<(), Box<dyn Error>> {
735        match changes
736            .iter()
737            .filter(|change| change["what"].as_str().unwrap_or("") == "attribute")
738            .filter(|change| change["action"].as_str().unwrap_or("") == action)
739            .map(|change| {
740                let id = match change["id"].as_str() {
741                    Some(id) => id.to_string(),
742                    None => return Err(format!("apply_diff_attributes: No ID given: {}", change)),
743                };
744                let element = match data.get_mut(&id) {
745                    Some(e) => e,
746                    None => {
747                        return Err(format!(
748                            "apply_diff_attributes: ID {} not found in GFF1",
749                            id
750                        ))
751                    }
752                };
753                let key = match change["key"].as_str() {
754                    Some(s) => s.to_string(),
755                    None => return Err(format!("apply_diff_attributes: No key given: {}", change)),
756                };
757                let value = match change["value"].as_str() {
758                    Some(s) => s.to_string(),
759                    None => {
760                        return Err(format!("apply_diff_attributes: No value given: {}", change))
761                    }
762                };
763                match change["action"].as_str() {
764                    Some("add") => Self::apply_diff_attribute_add(element, key, value),
765                    Some("remove") => Self::apply_diff_attribute_remove(element, key, value),
766                    Some(other) => Err(format!(
767                        "apply_diff_attributes: Unknown action {} in {:?}",
768                        other, change
769                    )),
770                    _ => Err(format!("apply_diff_attributes: No action in {:?}", change)),
771                }
772            })
773            .filter_map(|r| match r {
774                Ok(_) => None,
775                Err(e) => Some(e),
776            })
777            .nth(0)
778        {
779            Some(err) => Err(From::from(err)),
780            None => Ok(()),
781        }
782    }
783
784    /// Applies the given diff to the data loaded into the gff 1 slot.
785    pub fn apply_diff(&mut self, diff: &Value) -> Result<&HashGFF, Box<dyn Error>> {
786        let changes = match diff["changes"].as_array() {
787            Some(changes) => changes,
788            None => return Err(From::from(format!("No changes in diff"))),
789        };
790        let data = match self.data1.as_mut() {
791            Some(data) => data,
792            _ => return Err(From::from(format!("GFF set 1 needs to be initialized"))),
793        };
794        Self::apply_diff_rows(&changes, data, "remove")?;
795        Self::apply_diff_rows(&changes, data, "add")?;
796        Self::apply_diff_rows(&changes, data, "update")?;
797        Self::apply_diff_attributes(&changes, data, "remove")?;
798        Self::apply_diff_attributes(&changes, data, "add")?;
799        Self::apply_diff_attributes(&changes, data, "update")?;
800        Ok(data)
801    }
802}
803
804#[cfg(test)]
805mod tests {
806    use super::*;
807    use std::fs;
808
809    /// This computes a diff between two test GFF files, then compares it with the expected result (also a file).
810    /// Then, it applies the diff to the original GFF file (#1), and diffs the modified GFF to the #2 GFF.
811    /// This should yield no changes
812    fn compare_expected(dir: &str) {
813        let gff_file1 = format!("test/{}/core.gff", dir);
814        let gff_file2 = format!("test/{}/cap.gff", dir);
815        let expected_file = format!("test/{}/expected.json", dir);
816        let mut cg = CompareGFF::new(); //_from_files(gff_file1, gff_file2).unwrap();
817        cg.record_issues(true);
818        cg.load_gff(gff_file1, 1).unwrap();
819        cg.load_gff(gff_file2, 2).unwrap();
820        let mut diff = cg.diff().unwrap();
821        let expected = fs::read_to_string(expected_file).unwrap();
822        let mut expected: Value = serde_json::from_str(&expected).unwrap();
823        CompareGFF::sort_comparison(&mut diff);
824        CompareGFF::sort_comparison(&mut expected);
825        assert_eq!(diff["changes"], expected["changes"]);
826
827        // Apply diff to data2
828        cg.apply_diff(&diff).unwrap();
829        let diff = cg.diff().unwrap();
830        assert_eq!(diff["changes"].as_array().unwrap().len(), 0);
831    }
832
833    #[test]
834    fn attribute_added() {
835        let id: String = "the_id".to_string();
836        let key: String = "the_key".to_string();
837        let values = vec!["value1".to_string(), "value3".to_string()];
838        let mut attrs = MultiMap::new();
839        let mut result = json! ({"changes":[]});
840        let result = Arc::new(Mutex::new(&mut result));
841
842        attrs.insert("the_key".to_string(), "value1".to_string());
843        attrs.insert("the_key".to_string(), "value2".to_string());
844        attrs.insert("the_key".to_string(), "value3".to_string());
845
846        CompareGFF::new().compare_attributes(
847            &id,
848            &key,
849            &values,
850            &attrs,
851            CompareMode::Forward,
852            &result,
853        );
854
855        let expected = json! ({ "changes" : [ { "action" : "add", "what": "attribute", "id" : id , "key":key , "value" : "value2" } ] });
856        assert_eq!(**result.lock().unwrap(), expected);
857    }
858
859    #[test]
860    fn added_exon() {
861        compare_expected("added_exon");
862    }
863
864    #[test]
865    fn alter_exon() {
866        compare_expected("alter_exon");
867    }
868
869    #[test]
870    fn gene_in_intron() {
871        compare_expected("gene_in_intron");
872    }
873
874    #[test]
875    fn gene_merge() {
876        compare_expected("gene_merge");
877    }
878
879    #[test]
880    fn gene_split() {
881        compare_expected("gene_split");
882    }
883
884    #[test]
885    fn gene_validation_3exons_ok() {
886        // No change, actually
887        compare_expected("gene_validation_3exons_ok");
888    }
889
890    #[test]
891    fn gene_validation_no_start() {
892        compare_expected("gene_validation_no_start");
893    }
894
895    #[test]
896    fn gene_validation_no_stop() {
897        compare_expected("gene_validation_no_stop");
898    }
899
900    #[test]
901    fn gene_validation_ok() {
902        compare_expected("gene_validation_ok");
903    }
904
905    #[test]
906    fn iso_form() {
907        compare_expected("iso_form");
908    }
909
910    #[test]
911    fn iso_form_lost() {
912        compare_expected("iso_form_lost");
913    }
914
915    #[test]
916    fn new_gene() {
917        compare_expected("new_gene");
918    }
919
920    #[test]
921    fn no_change() {
922        // There is actually a change here
923        compare_expected("no_change");
924    }
925
926    #[test]
927    fn no_change_isoforms() {
928        compare_expected("no_change_isoforms");
929    }
930
931    #[test]
932    fn remove_exon() {
933        compare_expected("remove_exon");
934    }
935
936    #[test]
937    fn utr_change() {
938        compare_expected("utr_change");
939    }
940}