1use std::{collections::BTreeMap, marker::PhantomData};
2
3pub use field_expr::{
4 BinaryParam, ComponentFieldRefParam, CondParam, ConstParam, ConstValue, FieldExpression,
5 FieldRefParam, RecordRefParam, StringRepr, UnaryParam,
6};
7use serde::{Deserialize, Serialize};
8
9use strum::AsRefStr;
10
11mod field_expr;
12
13#[derive(Serialize, Deserialize, Debug, Clone)]
15pub struct ConstBagRef<T> {
16 const_bag_key: usize,
17 #[serde(skip)]
18 _phantom_data: PhantomData<T>,
19}
20
21impl<T> ConstBagRef<T> {
22 pub fn get_const_bag_ident(&self) -> String {
23 format!("__CONST_BAG_VALUE_{}", self.const_bag_key)
24 }
25}
26
27#[derive(Serialize, Deserialize, Debug, Clone)]
28#[serde(untagged)]
29pub enum ConstOrEnv<T> {
30 Const(T),
31 Env(ConstBagRef<T>),
32}
33
34#[derive(Serialize, Deserialize, Debug, Clone, AsRefStr)]
35#[serde(tag = "opcode")]
36pub enum GrassIR {
37 CastToBed(CastToBedParam),
39 Let(LetBinding),
41 Ref(RefParam),
43 Open(OpenParam),
45 WriteFile(WriteFileParam),
47 Alter(AlterParam),
49 Filter(FilterParam),
51 MergeOverlap(MergeOverlapParam),
53 Intersection(IntersectParam),
55 Format(FormatParam),
57 GroupBy(GroupByParam),
59
60 AssumeSorted(AssumeSortedParam),
61
62 InlineRust(InlineRustParam),
63
64 LoadGenomeFile(LoadGenomeFileParam),
65
66 SortedRandom(SortedRandomParam),
67
68 InternalSort(InternalSortParam),
69
70 Invert(InvertParam),
71
72 AssignTag(AssignTagParam),
73
74 TwoWayMerge(TwoWayMergeParam),
75
76 Limit(LimitParam),
77
78 Nop(NopParam),
79}
80
81#[derive(Serialize, Deserialize, Debug, Clone)]
82pub struct LimitParam {
83 pub what: Box<GrassIR>,
84 pub count: ConstOrEnv<f64>,
85}
86
87#[derive(Serialize, Deserialize, Debug, Clone)]
88pub struct TwoWayMergeParam {
89 pub expr_1: Box<GrassIR>,
90 pub expr_2: Box<GrassIR>,
91}
92
93#[derive(Serialize, Deserialize, Debug, Clone)]
94#[serde(untagged)]
95pub enum TagValue {
96 String(String),
97 Int(i64),
98 Float(f64),
99}
100
101#[derive(Serialize, Deserialize, Debug, Clone)]
102pub struct AssignTagParam {
103 pub inner: Box<GrassIR>,
104 pub tag: TagValue,
105}
106
107#[derive(Serialize, Deserialize, Debug, Clone)]
108pub struct InvertParam {
109 pub inner: Box<GrassIR>,
110}
111
112#[derive(Serialize, Deserialize, Debug, Clone)]
113pub struct InternalSortParam {
114 pub inner: Box<GrassIR>,
115}
116
117#[derive(Serialize, Deserialize, Debug, Clone)]
118pub struct NopParam {
119 pub inner: Box<GrassIR>,
120}
121
122#[derive(Serialize, Deserialize, Debug, Clone)]
123pub struct SortedRandomParam {
124 pub count: ConstOrEnv<usize>,
125 pub min_length: ConstOrEnv<u32>,
126 pub max_length: ConstOrEnv<u32>,
127}
128
129#[derive(Serialize, Deserialize, Debug, Clone)]
130pub enum LoadGenomeFileParam {
131 File(ConstOrEnv<String>),
132}
133
134#[derive(Serialize, Deserialize, Debug, Clone)]
135pub struct InlineRustParam {
136 pub env: BTreeMap<String, GrassIR>,
137 pub src: String,
138}
139
140#[derive(Serialize, Deserialize, Debug, Clone)]
141pub struct RefParam {
142 pub id: String,
144}
145
146#[derive(Serialize, Deserialize, Debug, Clone)]
147pub struct GroupByParam {
148 #[serde(rename = "inner")]
150 pub expr: Box<GrassIR>,
151 pub keys: Vec<FieldExpression>,
153}
154
155#[derive(Serialize, Deserialize, Debug, Clone)]
156pub struct FormatParam {
157 #[serde(rename = "inner")]
159 pub expr: Box<GrassIR>,
160 pub fmt_str: String,
162 pub values: BTreeMap<String, FieldExpression>,
164}
165
166#[derive(Serialize, Deserialize, Debug, Clone)]
167pub enum IntersectFlavor {
168 #[serde(rename = "inner")]
169 Inner,
170 #[serde(rename = "outer")]
171 Outer,
172 #[serde(rename = "left-outer")]
173 LeftOuter,
174 #[serde(rename = "right-outer")]
175 RightOuter,
176}
177
178#[derive(Serialize, Deserialize, Debug, Clone)]
179pub struct IntersectParam {
180 pub flavor: IntersectFlavor,
182 pub lhs: Box<GrassIR>,
184 pub rhs: Box<GrassIR>,
186 pub sorted: bool,
188}
189
190#[derive(Serialize, Deserialize, Debug, Clone)]
191pub struct MergeOverlapParam {
192 #[serde(rename = "inner")]
193 pub input_expr: Box<GrassIR>,
194}
195
196#[derive(Serialize, Deserialize, Debug, Clone)]
197pub struct FilterParam {
198 #[serde(rename = "inner")]
200 pub input_expr: Box<GrassIR>,
201 pub cond: FieldExpression,
203}
204
205#[derive(Serialize, Deserialize, Debug, Clone)]
206pub struct AlterParam {
207 #[serde(rename = "inner")]
209 pub original_expr: Box<GrassIR>,
210 pub field: String,
212 pub value: FieldExpression,
214 pub sorted: bool,
215}
216
217#[derive(Serialize, Deserialize, Debug, Clone)]
218pub struct AssumeSortedParam {
219 pub inner: Box<GrassIR>,
220}
221
222#[derive(Serialize, Deserialize, Debug, Clone)]
223pub struct CastToBedParam {
224 pub inner: Box<GrassIR>,
225 pub num_of_fields: u32,
226 pub sorted: bool,
227}
228
229#[derive(Serialize, Deserialize, Debug, Clone)]
230pub enum InputFormat {
231 Bam,
232 Bed,
233 Cram,
234 Vcf,
235 Fasta,
236}
237
238#[derive(Serialize, Deserialize, Debug, Clone)]
239pub enum OpenTarget {
240 Path(ConstOrEnv<String>),
241 FileNo(u32),
242 CmdArg(u32),
243}
244
245#[derive(Serialize, Deserialize, Debug, Clone)]
246pub struct OpenParam {
247 pub target: OpenTarget,
249 pub format: InputFormat,
251 pub num_of_fields: i32,
253 pub compression: bool,
255 pub sorted: bool,
257}
258
259#[derive(Serialize, Deserialize, Debug, Clone)]
260#[serde(untagged)]
261pub enum WriteTarget {
262 Path(ConstOrEnv<String>),
263 FileNo(i32),
264}
265
266#[derive(Serialize, Deserialize, Debug, Clone)]
267pub struct WriteFileParam {
268 pub what: Box<GrassIR>,
270 pub target: WriteTarget,
272}
273
274#[derive(Serialize, Deserialize, Debug, Clone)]
275pub struct LetBinding {
276 pub id: String,
278 pub value: Box<GrassIR>,
280}
281
282#[cfg(test)]
283mod test {
284 use std::{collections::BTreeMap, error::Error};
285
286 use serde::{Deserialize, Serialize};
287 use serde_json::from_str;
288
289 use crate::GrassIR;
290
291 #[derive(Serialize, Deserialize, PartialEq, Clone, Debug)]
292 #[serde(untagged)]
293 enum JsonValue {
294 String(String),
295 Number(f64),
296 Boolean(bool),
297 List(Vec<JsonValue>),
298 Object(BTreeMap<String, JsonValue>),
299 }
300
301 fn validate_object<'a, T: Serialize>(input: &str, obj: &'a T) {
302 let input_dict: JsonValue = serde_json::from_str(input).unwrap();
303 let obj_str = serde_json::to_string(obj).unwrap();
304 let obj_dict: JsonValue = serde_json::from_str(&obj_str).unwrap();
305 assert_eq!(obj_dict, input_dict);
306 }
307
308 macro_rules! parse_test {
309 ($name: ident, $path : expr) => {
310 #[test]
311 fn $name() -> Result<(), Box<dyn Error>> {
312 let input = include_str!($path);
313 let data: GrassIR = from_str(input)?;
314 validate_object(input, &data);
315 Ok(())
316 }
317 };
318 }
319 parse_test!(parse_bam_to_bed, "../../data/ir/bam-to-bed.py.json");
320 parse_test!(
321 parse_expand_interval,
322 "../../data/ir/expand-interval.py.json"
323 );
324 parse_test!(parse_filter, "../../data/ir/filter.py.json");
325 parse_test!(parse_merge, "../../data/ir/merge.py.json");
326 parse_test!(parse_slop, "../../data/ir/slop.py.json");
327 parse_test!(
328 parse_sorted_intersect_custom_format,
329 "../../data/ir/sorted-intersect-custom-fmt.py.json"
330 );
331 parse_test!(
332 parse_sorted_intersect_groupby,
333 "../../data/ir/sorted-intersect-group.py.json"
334 );
335 parse_test!(
336 parse_sorted_intersect_leftouter,
337 "../../data/ir/sorted-intersect-leftouter.py.json"
338 );
339 parse_test!(
340 parse_sorted_intersect_overlap_filter,
341 "../../data/ir/sorted-intersect-overlap-filter.py.json"
342 );
343 parse_test!(
344 parse_sorted_intersect,
345 "../../data/ir/sorted-intersect.py.json"
346 );
347 parse_test!(parse_sorted_window, "../../data/ir/window.py.json");
348}