tsg_core/graph/
edge.rs

1use std::str::FromStr;
2use std::{fmt, io};
3
4use ahash::HashMap;
5use anyhow::Result;
6use bon::Builder;
7use bstr::{BString, ByteVec};
8
9use super::Attribute;
10
11/// Represents a structural variant in a genomic sequence.
12///
13/// A structural variant describes a genomic rearrangement between two locations,
14/// potentially on different reference sequences.
15///
16/// # Fields
17///
18/// * `reference_name1` - The name of the first reference sequence.
19/// * `reference_name2` - The name of the second reference sequence.
20/// * `breakpoint1` - The position on the first reference sequence where the variant occurs.
21/// * `breakpoint2` - The position on the second reference sequence where the variant occurs.
22/// * `sv_type` - The type of structural variant (e.g., "DEL", "INV", "DUP", "TRA").
23///
24/// # Examples
25///
26/// ```
27/// use bstr::BString;
28/// use tsg_core::graph::StructuralVariant;
29///
30/// let sv = StructuralVariant {
31///     reference_name1: BString::from("chr1"),
32///     reference_name2: BString::from("chr1"),
33///     breakpoint1: 1000,
34///     breakpoint2: 5000,
35///     sv_type: BString::from("DEL"),
36/// };
37///
38/// let sv_from_builder = StructuralVariant::builder()
39///    .reference_name1("chr1")
40///    .reference_name2("chr1")
41///    .breakpoint1(1000)
42///    .breakpoint2(5000)
43///    .sv_type(BString::from("DEL"))
44///    .build();
45/// ```
46#[derive(Debug, Builder, Clone, Default)]
47#[builder(on(BString, into))]
48pub struct StructuralVariant {
49    pub reference_name1: BString,
50    pub reference_name2: BString,
51    pub breakpoint1: usize,
52    pub breakpoint2: usize,
53    pub sv_type: BString,
54}
55
56impl FromStr for StructuralVariant {
57    type Err = io::Error;
58
59    fn from_str(s: &str) -> Result<Self, Self::Err> {
60        // E  <id>  <source_id>  <sink_id>  <SV>
61        let parts: Vec<&str> = s.split(',').collect();
62        if parts.len() != 5 {
63            return Err(io::Error::new(
64                io::ErrorKind::InvalidData,
65                format!("Invalid SV format: {}", s),
66            ));
67        }
68
69        let breakpoint1 = parts[2].parse::<usize>().map_err(|e| {
70            io::Error::new(
71                io::ErrorKind::InvalidData,
72                format!("Invalid breakpoint1: {}", e),
73            )
74        })?;
75
76        let breakpoint2 = parts[3].parse::<usize>().map_err(|e| {
77            io::Error::new(
78                io::ErrorKind::InvalidData,
79                format!("Invalid breakpoint2: {}", e),
80            )
81        })?;
82
83        Ok(StructuralVariant {
84            reference_name1: parts[0].into(),
85            reference_name2: parts[1].into(),
86            breakpoint1,
87            breakpoint2,
88            sv_type: parts[4].into(),
89        })
90    }
91}
92
93impl fmt::Display for StructuralVariant {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        write!(
96            f,
97            "{},{},{},{},{}",
98            self.reference_name1,
99            self.reference_name2,
100            self.breakpoint1,
101            self.breakpoint2,
102            self.sv_type
103        )
104    }
105}
106
107/// Represents an edge in a transcript segment graph.
108///
109/// Each edge contains a structural variant and additional attributes.
110///
111/// # Fields
112///
113/// * `id` - The unique identifier for this edge.
114/// * `sv` - The structural variant associated with this edge.
115/// * `attributes` - A collection of additional attributes for this edge.
116///
117#[derive(Debug, Clone, Builder, Default)]
118#[builder(on(BString, into))]
119pub struct EdgeData {
120    pub id: BString,
121    pub sv: StructuralVariant,
122    #[builder(default)]
123    pub attributes: HashMap<BString, Attribute>,
124}
125
126impl EdgeData {
127    pub fn to_vcf(&self, attributes: Option<&[Attribute]>) -> Result<BString> {
128        let mut vcf = BString::from("");
129        vcf.push_str(format!(
130            "{}\t{}\t{}\t.\t<{}>\t.\t.\tSVTYPE={};CHR2={};SVEND={};",
131            self.sv.reference_name1,
132            self.sv.breakpoint1,
133            self.id,
134            self.sv.sv_type,
135            self.sv.sv_type,
136            self.sv.reference_name2,
137            self.sv.breakpoint2,
138        ));
139
140        let mut info = BString::from("");
141        for attr in self.attributes.values() {
142            info.push_str(format!("{}={};", attr.tag, attr.value));
143        }
144
145        if let Some(attributes) = attributes {
146            for attr in attributes.iter() {
147                info.push_str(format!("{}={};", attr.tag, attr.value));
148            }
149        }
150
151        vcf.push_str(&info);
152        vcf.push_str("\tGT\t0/1");
153        Ok(vcf)
154    }
155}