1extern crate bio;
2#[macro_use]
3extern crate serde_json;
4#[macro_use]
5extern crate lazy_static;
6
7use bio::io::gff;
8use ignore_result::Ignore;
9use libflate::gzip::Decoder;
10use multimap::MultiMap;
11use rayon::prelude::*;
12use regex::Regex;
13use serde_json::value::Value;
14use std::collections::HashMap;
15use std::collections::HashSet;
16use std::error::Error;
17use std::fs::File;
18use std::sync::{Arc, Mutex};
19
20type HashGFF = HashMap<String, bio::io::gff::Record>;
21
22pub enum CompareMode {
23 Forward,
24 Reverse,
25}
26
27pub struct CompareGFF {
28 data1: Option<HashGFF>,
29 data2: Option<HashGFF>,
30 record_issues: bool,
31 flexible_ids: bool,
32 issues: Vec<String>,
33}
34
35impl CompareGFF {
36 pub fn new() -> Self {
38 Self {
39 data1: None,
40 data2: None,
41 record_issues: false,
42 flexible_ids: true,
43 issues: vec![],
44 }
45 }
46
47 pub fn record_issues(&mut self, do_record: bool) {
49 self.record_issues = do_record;
50 }
51
52 pub fn flexible_ids(&mut self, use_flexible_ids: bool) {
54 self.flexible_ids = use_flexible_ids;
55 }
56
57 pub fn new_from_files<S: Into<String>>(
59 filename1: S,
60 filename2: S,
61 ) -> Result<Self, Box<dyn Error>> {
62 let mut ret = Self::new();
63 ret.load_gff(filename1, 1)?;
64 ret.load_gff(filename2, 2)?;
65 Ok(ret)
66 }
67
68 fn get_file_reader<S: Into<String>>(
69 filename: S,
70 ) -> Result<Box<dyn std::io::Read>, Box<dyn Error>> {
71 lazy_static! {
72 static ref RE_GZ: Regex =
73 Regex::new(r#"\.gz(ip){0,1}$"#).expect("new_from_files: RE_GZ does not compile");
74 }
75 let filename: String = filename.into();
76 if RE_GZ.is_match(&filename) {
77 let file_stream = File::open(filename)?;
78 let decoder = Decoder::new(file_stream)?;
79 Ok(Box::new(decoder))
80 } else {
81 let file_stream = File::open(filename)?;
82 Ok(Box::new(file_stream))
83 }
84 }
85
86 pub fn load_gff<S: Into<String>>(
87 &mut self,
88 filename: S,
89 data_set: u8,
90 ) -> Result<(), Box<dyn Error>> {
91 let reader = Self::get_file_reader(filename)?;
93 let data = Some(self.read(reader)?);
94 match data_set {
95 1 => self.data1 = data,
96 2 => self.data2 = data,
97 _ => {
98 return Err(From::from(format!(
99 "Data set number not 1 or 2: {}",
100 data_set
101 )))
102 }
103 }
104 Ok(())
105 }
106
107 pub fn diff(&self) -> Result<Value, Box<dyn Error>> {
109 let mut result = json!( {
110 "changes" :[]
111 });
112 self.compare(CompareMode::Forward, &mut result)?;
113 self.compare(CompareMode::Reverse, &mut result)?;
114 if self.record_issues {
115 result["issues"] = json!(self.issues);
116 }
117 Ok(result)
118 }
119
120 pub fn sort_comparison(result: &mut Value) {
122 match result["changes"].as_array_mut() {
123 Some(changes) => {
124 changes.par_sort_by(|a, b| {
125 let a = serde_json::to_string(a).unwrap();
126 let b = serde_json::to_string(b).unwrap();
127 a.partial_cmp(&b).unwrap()
128 });
129 result["changes"] = json!(changes);
130 }
131 None => {}
132 }
133 }
134
135 pub fn diff_apollo(&self) -> Result<Value, Box<dyn Error>> {
136 self.compare_apollo()
137 }
138
139 fn log_issue(&mut self, issue: String) {
140 if self.record_issues {
141 self.issues.push(issue);
142 } else {
143 eprintln!("{}", issue);
144 }
145 }
146
147 fn unique_id(
148 &mut self,
149 ids: &mut HashSet<String>,
150 id: String,
151 entry: &bio::io::gff::Record,
152 ) -> Result<String, String> {
153 if !ids.contains(&id) {
154 return Ok(id);
155 }
156 let mut id = id;
157 if self.flexible_ids {
158 let original_id = id.to_owned();
159 id += &format!(
160 "_{}_{}_{}",
161 entry.feature_type(),
162 entry.start(),
163 entry.end()
164 );
165 if !ids.contains(&id) {
166 self.log_issue(format!("Double ID {}, changed to {}", &original_id, &id));
167 return Ok(id);
168 }
169 }
170 return Err(format!("Double ID {}, not adding {:?}", &id, entry));
171 }
172
173 fn read(&mut self, file: Box<dyn std::io::Read>) -> Result<HashGFF, Box<dyn Error>> {
175 let mut reader = gff::Reader::new(file, gff::GffType::GFF3);
176
177 let mut ids: HashSet<String> = HashSet::new();
179 let ret: HashMap<String, bio::io::gff::Record> = reader
180 .records()
181 .filter_map(|element| {
182 let e = element.ok()?;
183 let id = match self.unique_id(&mut ids, e.attributes().get("ID")?.to_string(), &e) {
184 Ok(id) => id,
185 Err(e) => {
186 self.log_issue(e);
187 return None;
188 }
189 };
190 ids.insert(id.clone());
191 Some((id, e))
192 })
193 .collect();
194 if ret.is_empty() {
195 return Err(From::from(format!("Empty file or no gff file")));
196 }
197 Ok(ret)
198 }
199
200 fn write(&self, file: Box<dyn std::io::Write>, data: &HashGFF) -> Result<(), Box<dyn Error>> {
202 let mut writer = gff::Writer::new(file, gff::GffType::GFF3);
203 for (_k, v) in data {
204 writer.write(v)?;
205 }
206 Ok(())
207 }
208
209 pub fn write_data1(&self, file: Box<dyn std::io::Write>) -> Result<(), Box<dyn Error>> {
210 match &self.data1 {
211 Some(data1) => self.write(file, &data1),
212 None => Err(From::from(format!("write_data1:: data1 is not set"))),
213 }
214 }
215
216 fn compare_attributes(
218 &self,
219 id: &String,
220 key: &String,
221 values: &Vec<String>,
222 attrs: &MultiMap<String, String>,
223 mode: CompareMode,
224 result: &Arc<Mutex<&mut Value>>,
225 ) {
226 if !attrs.contains_key(key) {
228 values.par_iter().for_each(|value|{
229 let action = match mode {
230 CompareMode::Forward => "remove",
231 _ => "add",
232 };
233 let j = json!( {"action" : action , "what": "attribute" , "id" : id , "key":key.to_string() , "value" : value.to_string() } );
234 result.lock().unwrap()["changes"]
235 .as_array_mut()
236 .unwrap()
237 .push(j);
238 });
239 return;
240 }
241
242 let values2 = attrs.get_vec(key).unwrap();
244
245 values2.par_iter().for_each(|value2|{
246 if !values.contains(&value2) {
247 let action = match mode {
248 CompareMode::Forward => "add",
249 _ => "remove",
250 };
251 let j = json!({ "action" : action , "what" : "attribute" , "id" : id , "key":key , "value" : value2 } );
252 result.lock().unwrap()["changes"]
253 .as_array_mut()
254 .unwrap()
255 .push(j);
256 }
257 });
258
259 match mode {
260 CompareMode::Forward => {}
261 CompareMode::Reverse => {
262 values.par_iter().for_each(|value|{
263 if !values2.contains(&value) {
264 let j = json!({"action" : "add", "what" : "attribute" , "id" : id , "key":key , "value" : value });
265 result.lock().unwrap()["changes"]
266 .as_array_mut()
267 .unwrap()
268 .push(j);
269 }
270 });
271 }
272 }
273 }
274
275 fn compare_basics(
277 &self,
278 r1: &bio::io::gff::Record,
279 r2: &bio::io::gff::Record,
280 id: &str,
281 ) -> Vec<Value> {
282 let mut changes: Vec<Value> = vec![];
283 if r1.seqname() != r2.seqname() {
284 let j = json!({ "action" : "update" , "what" : "row" , "id" : id , "key" : "seqname" , "value" : r2.seqname() });
285 changes.push(j);
286 }
287 if r1.source() != r2.source() {
288 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "source" , "value" : r2.source() });
289 changes.push(j);
290 }
291 if r1.feature_type() != r2.feature_type() {
292 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "feature_type" , "value" : r2.feature_type() });
293 changes.push(j);
294 }
295 if r1.start() != r2.start() {
296 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "start" , "value" : r2.start().to_string() });
297 changes.push(j);
298 }
299 if r1.end() != r2.end() {
300 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "end" , "value" : r2.end().to_string() });
301 changes.push(j);
302 }
303 if r1.score() != r2.score() {
304 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "score" , "value" : r2.score() });
305 changes.push(j);
306 }
307 if r1.strand() != r2.strand() {
308 let mut strand: String;
309 strand = ".".to_string();
310 let s = r2.strand();
311 if s.is_some() {
312 strand = s.unwrap().strand_symbol().to_string();
313 }
314 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "strand" , "value" : strand });
315 changes.push(j);
316 }
317 if r1.frame() != r2.frame() {
318 let j = json!( { "action" : "update" , "what" : "row" , "id" : id , "key" : "frame" , "value" : r2.frame() });
319 changes.push(j);
320 }
321 changes
322 }
323
324 fn compare(&self, mode: CompareMode, result: &mut Value) -> Result<(), Box<dyn Error>> {
326 let (data1, data2) = match (&self.data1, &self.data2) {
327 (Some(data1), Some(data2)) => match mode {
328 CompareMode::Forward => (data1, data2),
329 CompareMode::Reverse => (data2, data1),
330 },
331 _ => return Err(From::from(format!("Both GFF sets need to be initialized"))),
332 };
333 let result = Arc::new(Mutex::new(result));
334 data1.par_iter().for_each(|(id, r1)| {
335 if data2.contains_key(id) {
336 match mode {
337 CompareMode::Forward => {}
338 CompareMode::Reverse => return, }
340 let r2 = &data2[id];
341 self.compare_basics(&r1, r2, id.as_str())
342 .par_iter()
343 .for_each(|change| {
344 result.lock().unwrap()["changes"]
345 .as_array_mut()
346 .unwrap()
347 .push(change.to_owned())
348 });
349
350 let r1a = r1.attributes();
351 let r2a = r2.attributes();
352 for (key, value) in r1a {
353 self.compare_attributes(&id, key, value, r2a, CompareMode::Forward, &result);
354 }
355
356 for (key, value) in r2a {
357 self.compare_attributes(&id, key, value, r1a, CompareMode::Reverse, &result);
358 }
359 } else {
360 match mode {
361 CompareMode::Forward => {
362 let mut o = json! ({"what":"row" , "action": "remove" , "id":id });
363 let s = serde_json::to_string(&r1).unwrap();
364 o["removed_element"] = serde_json::from_str(&s).unwrap();
365 result.lock().unwrap()["changes"]
366 .as_array_mut()
367 .unwrap()
368 .push(o);
369 }
370 CompareMode::Reverse => {
371 let mut o = json! ({"what":"row" , "action": "add" , "id":id });
372 let s = serde_json::to_string(&r1).unwrap();
373 o["added_element"] = serde_json::from_str(&s).unwrap();
374 result.lock().unwrap()["changes"]
375 .as_array_mut()
376 .unwrap()
377 .push(o);
378 }
379 }
380 }
381 });
382 Ok(())
383 }
384
385 fn get_root_parent_id(
386 &self,
387 data: &HashGFF,
388 id: &String,
389 seen: Option<HashSet<String>>,
390 ) -> Option<String> {
391 let mut seen: HashSet<String> = seen.unwrap_or(HashSet::new());
392 if seen.contains(id) {
393 return None; }
395 seen.insert(id.to_string());
396 match data.get(id) {
397 Some(element) => match element.attributes().get("Parent") {
398 Some(parent_id) => self.get_root_parent_id(data, parent_id, Some(seen)),
399 None => Some(id.to_string()),
400 },
401 None => None,
402 }
403 }
404
405 fn infer_original_id_from_apollo(
406 &self,
407 data1: &HashGFF,
408 data2: &HashGFF,
409 apollo_element: &bio::io::gff::Record,
410 issues: &mut Vec<String>,
411 ) -> Option<String> {
412 match apollo_element.attributes().get("orig_id") {
414 Some(orig_id) => {
415 return match data1.get(orig_id) {
416 Some(_) => Some(orig_id.to_string()),
417 None => {
418 issues.push(format!(
419 "Original ID '{}' given in Apollo GFF is not in full dataset!",
420 orig_id
421 ));
422 None
423 }
424 }
425 }
426 None => {}
427 }
428
429 let apollo_id = apollo_element.attributes().get("ID")?;
431 let apollo_parent_id = self.get_root_parent_id(data2, apollo_id, None)?;
432 let some_apollo_parent_id = Some(apollo_parent_id.to_owned());
436 let orig_parent_id = data2
437 .iter()
438 .filter(|(id, _element)| {
439 self.get_root_parent_id(data2, id, None) == some_apollo_parent_id
440 }) .filter_map(|(_id, element)| element.attributes().get("orig_id")) .map(|s| s.to_string())
443 .filter(|orig_id| data1.contains_key(orig_id)) .filter_map(|orig_id| self.get_root_parent_id(data1, &orig_id, None)) .nth(0)?;
446
447 let some_orig_parent_id = Some(orig_parent_id);
449 let all_children_orig: HashGFF = data1
450 .iter()
451 .filter(|(_id, data)| data.seqname() == apollo_element.seqname()) .filter(|(id, _data)| self.get_root_parent_id(data1, id, None) == some_orig_parent_id) .map(|(id, data)| (id.to_owned(), data.to_owned()))
454 .collect();
455
456 let same_type: HashGFF = all_children_orig
458 .iter()
459 .filter(|(_id, data)| data.feature_type() == apollo_element.feature_type())
460 .map(|(id, data)| (id.to_owned(), data.to_owned()))
461 .collect();
462
463 if same_type.len() == 1 {
465 return Some(
466 same_type
467 .iter()
468 .map(|(id, _data)| id.to_owned())
469 .nth(0)
470 .unwrap(),
471 );
472 }
473
474 None
477 }
478
479 fn compare_apollo(&self) -> Result<Value, Box<dyn Error>> {
481 let (data1, data2) = match (&self.data1, &self.data2) {
482 (Some(data1), Some(data2)) => (data1, data2),
483 _ => return Err(From::from(format!("Both GFF sets need to be initialized"))),
484 };
485 let mut issues: Vec<String> = self.issues.clone();
486 let mut changes: Vec<Value> = vec![];
487
488 let _re = Regex::new(r"-\d+$").unwrap();
489
490 for (_id, apollo_element) in data2 {
491 let _attrs = apollo_element.attributes();
492 let original_id = match self.infer_original_id_from_apollo(
493 data1,
494 data2,
495 &apollo_element,
496 &mut issues,
497 ) {
498 Some(id) => id,
499 None => {
500 issues.push(format!("No original ID found for {:?}", apollo_element));
501 continue;
502 }
503 };
504 let original_parent_id = match data1.get(&original_id) {
505 Some(e) => e.attributes().get("Parent"),
506 None => None,
507 };
508 let original_element = match data1.get(&original_id) {
509 Some(e) => e,
510 None => {
511 issues.push(format!("No original element found for {}", &original_id));
512 continue;
513 }
514 };
515
516 match (
518 original_parent_id,
519 original_element.attributes().get("Parent"),
520 ) {
521 (Some(apollo_opid), Some(original_opid)) => {
522 if *apollo_opid != *original_opid {
523 let j = json!({ "action":"update" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : apollo_opid } );
524 changes.push(j);
525 let j = json!({ "action":"remove" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : original_opid } );
526 changes.push(j);
527 }
528 }
529 (Some(apollo_opid), None) => {
530 let j = json!({ "action":"add" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : apollo_opid } );
531 changes.push(j);
532 }
533 (None, Some(original_opid)) => {
534 let _j = json!({ "action":"remove" , "what":"attribute" , "id" : original_id , "key":"Parent" , "value" : original_opid } );
535 }
537 (None, None) => {}
538 }
539
540 self.compare_basics(&original_element, &apollo_element, original_id.as_str())
541 .drain(..)
542 .filter(|change| {
543 match (
544 change["action"].as_str(),
545 change["key"].as_str(),
546 change["value"].as_str(),
547 ) {
548 (Some("update"), Some("source"), Some(".")) => false,
549 _ => true, }
551 })
552 .for_each(|change| changes.push(change));
553 }
554
555 Ok(match self.record_issues {
556 true => json!({"changes": changes, "issues": issues}),
557 false => json!({ "changes": changes }),
558 })
559 }
560
561 fn gff_from_json(j: &Value) -> Result<bio::io::gff::Record, String> {
562 let mut ret = bio::io::gff::Record::new();
563 Self::apply_diff_row_update(&json!({"key":"seqname","value":&j["seqname"]}), &mut ret)
564 .ignore();
565 Self::apply_diff_row_update(&json!({"key":"source","value":&j["source"]}), &mut ret)
566 .ignore();
567 Self::apply_diff_row_update(
568 &json!({"key":"feature_type","value":&j["feature_type"]}),
569 &mut ret,
570 )
571 .ignore();
572 Self::apply_diff_row_update(&json!({"key":"start","value":&j["start"]}), &mut ret).ignore();
573 Self::apply_diff_row_update(&json!({"key":"end","value":&j["end"]}), &mut ret).ignore();
574 Self::apply_diff_row_update(&json!({"key":"score","value":&j["score"]}), &mut ret).ignore();
575 Self::apply_diff_row_update(&json!({"key":"strand","value":&j["strand"]}), &mut ret)
576 .ignore();
577 Self::apply_diff_row_update(&json!({"key":"frame","value":&j["frame"]}), &mut ret).ignore();
578
579 match j["attributes"].as_object() {
581 Some(attributes) => {
582 attributes.iter().for_each(|(key, values)| {
583 match values.as_array() {
584 Some(values) => {
585 values
586 .iter()
587 .filter_map(|value| value.as_str())
588 .for_each(|value| {
589 Self::apply_diff_attribute_add(
590 &mut ret,
591 key.to_string(),
592 value.to_string(),
593 )
594 .ignore();
595 })
596 }
597 None => {} }
599 });
600 }
601 None => {} }
603
604 Ok(ret)
605 }
606
607 fn apply_diff_row_remove(change: &Value, data: &mut HashGFF) -> Result<(), String> {
608 match change["id"].as_str() {
609 Some(id) => {
610 data.remove(id);
611 }
612 None => return Err(format!("apply_diff_row_remove: add row, but no id set")),
613 }
614 Ok(())
615 }
616
617 fn apply_diff_row_update(
618 change: &Value,
619 element: &mut bio::io::gff::Record,
620 ) -> Result<(), String> {
621 let value = match change["value"].as_str() {
622 Some(v) => v.to_string(),
623 None => match change["value"].as_i64() {
624 Some(v) => v.to_string(),
625 None => return Err(format!("apply_diff_row_update: No value in {}", &change)),
626 },
627 };
628 match change["key"].as_str() {
629 Some("seqname") => *element.seqname_mut() = value,
630 Some("source") => *element.source_mut() = value,
631 Some("feature_type") => *element.feature_type_mut() = value,
632 Some("start") => *element.start_mut() = value.parse::<u64>().unwrap(),
633 Some("end") => *element.end_mut() = value.parse::<u64>().unwrap(),
634 Some("score") => *element.score_mut() = value,
635 Some("strand") => *element.strand_mut() = value,
636 Some("frame") => *element.frame_mut() = value,
637 _ => {
638 return Err(format!(
639 "apply_diff_row_update: Unknown/missing 'key' in {}",
640 change
641 ))
642 }
643 }
644 Ok(())
645 }
646
647 fn apply_diff_row_add_or_update(change: &Value, data: &mut HashGFF) -> Result<(), String> {
648 match change["id"].as_str() {
649 Some(id) => {
650 let action = change["action"].as_str().unwrap_or("");
651 let element = match action {
652 "add" => data
653 .entry(id.to_string())
654 .or_insert(Self::gff_from_json(&change["added_element"])?),
655 _ => match data.get_mut(&id.to_string()) {
656 Some(e) => e,
657 None => return Err(format!(
658 "apply_diff_row_add_or_update: {} ID {} does not appear in data set",
659 action, &id
660 )),
661 },
662 };
663 if action == "update" {
664 Self::apply_diff_row_update(change, element)?;
665 }
666 Ok(())
667 }
668 None => Err(format!(
669 "apply_diff_row_add_or_update: Missing 'id' in {:?}",
670 change
671 )),
672 }
673 }
674
675 fn apply_diff_rows(
676 changes: &Vec<Value>,
677 data: &mut HashGFF,
678 action: &str,
679 ) -> Result<(), Box<dyn Error>> {
680 match changes
681 .iter()
682 .filter(|change| change["what"].as_str().unwrap_or("") == "row")
683 .filter(|change| change["action"].as_str().unwrap_or("") == action)
684 .map(|change| match change["action"].as_str() {
685 Some("remove") => Self::apply_diff_row_remove(change, data),
686 Some("update") | Some("add") => Self::apply_diff_row_add_or_update(change, data),
687 Some(other) => Err(format!(
688 "apply_diff_rows: Unknown action {} in {:?}",
689 other, change
690 )),
691 _ => Err(format!("apply_diff_rows: No action in {:?}", change)),
692 })
693 .filter_map(|r| match r {
694 Ok(_) => None,
695 Err(e) => Some(e),
696 })
697 .nth(0)
698 {
699 Some(err) => Err(From::from(err)),
700 None => Ok(()),
701 }
702 }
703
704 fn apply_diff_attribute_add(
705 gff: &mut bio::io::gff::Record,
706 key: String,
707 value: String,
708 ) -> Result<(), String> {
709 gff.attributes_mut().insert(key, value);
710 Ok(())
711 }
712
713 fn apply_diff_attribute_remove(
714 gff: &mut bio::io::gff::Record,
715 key: String,
716 value: String,
717 ) -> Result<(), String> {
718 match gff.attributes_mut().get_vec_mut(&key) {
719 Some(v) => {
720 v.retain(|x| *x != value);
721 Ok(())
722 }
723 None => Err(format!(
724 "No attribute {}/{} to remove from {:?}",
725 key, value, gff
726 )),
727 }
728 }
729
730 fn apply_diff_attributes(
731 changes: &Vec<Value>,
732 data: &mut HashGFF,
733 action: &str,
734 ) -> Result<(), Box<dyn Error>> {
735 match changes
736 .iter()
737 .filter(|change| change["what"].as_str().unwrap_or("") == "attribute")
738 .filter(|change| change["action"].as_str().unwrap_or("") == action)
739 .map(|change| {
740 let id = match change["id"].as_str() {
741 Some(id) => id.to_string(),
742 None => return Err(format!("apply_diff_attributes: No ID given: {}", change)),
743 };
744 let element = match data.get_mut(&id) {
745 Some(e) => e,
746 None => {
747 return Err(format!(
748 "apply_diff_attributes: ID {} not found in GFF1",
749 id
750 ))
751 }
752 };
753 let key = match change["key"].as_str() {
754 Some(s) => s.to_string(),
755 None => return Err(format!("apply_diff_attributes: No key given: {}", change)),
756 };
757 let value = match change["value"].as_str() {
758 Some(s) => s.to_string(),
759 None => {
760 return Err(format!("apply_diff_attributes: No value given: {}", change))
761 }
762 };
763 match change["action"].as_str() {
764 Some("add") => Self::apply_diff_attribute_add(element, key, value),
765 Some("remove") => Self::apply_diff_attribute_remove(element, key, value),
766 Some(other) => Err(format!(
767 "apply_diff_attributes: Unknown action {} in {:?}",
768 other, change
769 )),
770 _ => Err(format!("apply_diff_attributes: No action in {:?}", change)),
771 }
772 })
773 .filter_map(|r| match r {
774 Ok(_) => None,
775 Err(e) => Some(e),
776 })
777 .nth(0)
778 {
779 Some(err) => Err(From::from(err)),
780 None => Ok(()),
781 }
782 }
783
784 pub fn apply_diff(&mut self, diff: &Value) -> Result<&HashGFF, Box<dyn Error>> {
786 let changes = match diff["changes"].as_array() {
787 Some(changes) => changes,
788 None => return Err(From::from(format!("No changes in diff"))),
789 };
790 let data = match self.data1.as_mut() {
791 Some(data) => data,
792 _ => return Err(From::from(format!("GFF set 1 needs to be initialized"))),
793 };
794 Self::apply_diff_rows(&changes, data, "remove")?;
795 Self::apply_diff_rows(&changes, data, "add")?;
796 Self::apply_diff_rows(&changes, data, "update")?;
797 Self::apply_diff_attributes(&changes, data, "remove")?;
798 Self::apply_diff_attributes(&changes, data, "add")?;
799 Self::apply_diff_attributes(&changes, data, "update")?;
800 Ok(data)
801 }
802}
803
804#[cfg(test)]
805mod tests {
806 use super::*;
807 use std::fs;
808
809 fn compare_expected(dir: &str) {
813 let gff_file1 = format!("test/{}/core.gff", dir);
814 let gff_file2 = format!("test/{}/cap.gff", dir);
815 let expected_file = format!("test/{}/expected.json", dir);
816 let mut cg = CompareGFF::new(); cg.record_issues(true);
818 cg.load_gff(gff_file1, 1).unwrap();
819 cg.load_gff(gff_file2, 2).unwrap();
820 let mut diff = cg.diff().unwrap();
821 let expected = fs::read_to_string(expected_file).unwrap();
822 let mut expected: Value = serde_json::from_str(&expected).unwrap();
823 CompareGFF::sort_comparison(&mut diff);
824 CompareGFF::sort_comparison(&mut expected);
825 assert_eq!(diff["changes"], expected["changes"]);
826
827 cg.apply_diff(&diff).unwrap();
829 let diff = cg.diff().unwrap();
830 assert_eq!(diff["changes"].as_array().unwrap().len(), 0);
831 }
832
833 #[test]
834 fn attribute_added() {
835 let id: String = "the_id".to_string();
836 let key: String = "the_key".to_string();
837 let values = vec!["value1".to_string(), "value3".to_string()];
838 let mut attrs = MultiMap::new();
839 let mut result = json! ({"changes":[]});
840 let result = Arc::new(Mutex::new(&mut result));
841
842 attrs.insert("the_key".to_string(), "value1".to_string());
843 attrs.insert("the_key".to_string(), "value2".to_string());
844 attrs.insert("the_key".to_string(), "value3".to_string());
845
846 CompareGFF::new().compare_attributes(
847 &id,
848 &key,
849 &values,
850 &attrs,
851 CompareMode::Forward,
852 &result,
853 );
854
855 let expected = json! ({ "changes" : [ { "action" : "add", "what": "attribute", "id" : id , "key":key , "value" : "value2" } ] });
856 assert_eq!(**result.lock().unwrap(), expected);
857 }
858
859 #[test]
860 fn added_exon() {
861 compare_expected("added_exon");
862 }
863
864 #[test]
865 fn alter_exon() {
866 compare_expected("alter_exon");
867 }
868
869 #[test]
870 fn gene_in_intron() {
871 compare_expected("gene_in_intron");
872 }
873
874 #[test]
875 fn gene_merge() {
876 compare_expected("gene_merge");
877 }
878
879 #[test]
880 fn gene_split() {
881 compare_expected("gene_split");
882 }
883
884 #[test]
885 fn gene_validation_3exons_ok() {
886 compare_expected("gene_validation_3exons_ok");
888 }
889
890 #[test]
891 fn gene_validation_no_start() {
892 compare_expected("gene_validation_no_start");
893 }
894
895 #[test]
896 fn gene_validation_no_stop() {
897 compare_expected("gene_validation_no_stop");
898 }
899
900 #[test]
901 fn gene_validation_ok() {
902 compare_expected("gene_validation_ok");
903 }
904
905 #[test]
906 fn iso_form() {
907 compare_expected("iso_form");
908 }
909
910 #[test]
911 fn iso_form_lost() {
912 compare_expected("iso_form_lost");
913 }
914
915 #[test]
916 fn new_gene() {
917 compare_expected("new_gene");
918 }
919
920 #[test]
921 fn no_change() {
922 compare_expected("no_change");
924 }
925
926 #[test]
927 fn no_change_isoforms() {
928 compare_expected("no_change_isoforms");
929 }
930
931 #[test]
932 fn remove_exon() {
933 compare_expected("remove_exon");
934 }
935
936 #[test]
937 fn utr_change() {
938 compare_expected("utr_change");
939 }
940}