1use crate::GeneId;
16use serde::{Deserialize, Serialize};
17use std::fmt;
18
19#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
20#[serde(tag = "op", rename_all = "lowercase")]
21pub enum Gpr {
22 Gene { id: GeneId },
23 And { operands: Vec<Gpr> },
24 Or { operands: Vec<Gpr> },
25}
26
27impl Gpr {
28 pub fn gene(id: impl Into<GeneId>) -> Self {
29 Gpr::Gene { id: id.into() }
30 }
31
32 pub fn normalize(self) -> Self {
34 match self {
35 g @ Gpr::Gene { .. } => g,
36 Gpr::And { operands } => {
37 let mut out = Vec::with_capacity(operands.len());
38 for op in operands {
39 match op.normalize() {
40 Gpr::And { operands: inner } => out.extend(inner),
41 other => out.push(other),
42 }
43 }
44 match out.len() {
45 0 => Gpr::And { operands: vec![] },
46 1 => out.into_iter().next().unwrap(),
47 _ => Gpr::And { operands: out },
48 }
49 }
50 Gpr::Or { operands } => {
51 let mut out = Vec::with_capacity(operands.len());
52 for op in operands {
53 match op.normalize() {
54 Gpr::Or { operands: inner } => out.extend(inner),
55 other => out.push(other),
56 }
57 }
58 match out.len() {
59 0 => Gpr::Or { operands: vec![] },
60 1 => out.into_iter().next().unwrap(),
61 _ => Gpr::Or { operands: out },
62 }
63 }
64 }
65 }
66
67 pub fn collect_genes(&self, out: &mut Vec<GeneId>) {
69 match self {
70 Gpr::Gene { id } => {
71 if !out.contains(id) {
72 out.push(id.clone());
73 }
74 }
75 Gpr::And { operands } | Gpr::Or { operands } => {
76 for op in operands {
77 op.collect_genes(out);
78 }
79 }
80 }
81 }
82}
83
84impl fmt::Display for Gpr {
85 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86 match self {
87 Gpr::Gene { id } => f.write_str(id.as_str()),
88 Gpr::And { operands } => {
89 for (i, op) in operands.iter().enumerate() {
90 if i > 0 {
91 f.write_str(" and ")?;
92 }
93 write_grouped(f, op, matches!(op, Gpr::Or { .. }))?;
94 }
95 Ok(())
96 }
97 Gpr::Or { operands } => {
98 for (i, op) in operands.iter().enumerate() {
99 if i > 0 {
100 f.write_str(" or ")?;
101 }
102 write_grouped(f, op, false)?;
103 }
104 Ok(())
105 }
106 }
107 }
108}
109
110fn write_grouped(f: &mut fmt::Formatter<'_>, g: &Gpr, paren: bool) -> fmt::Result {
111 if paren {
112 write!(f, "({})", g)
113 } else {
114 write!(f, "{}", g)
115 }
116}
117
118#[derive(Debug, thiserror::Error)]
119pub enum GprParseError {
120 #[error("unexpected character '{ch}' at position {pos}")]
121 UnexpectedChar { ch: char, pos: usize },
122 #[error("unexpected end of input at position {pos}")]
123 UnexpectedEnd { pos: usize },
124 #[error("unclosed parenthesis opened at position {pos}")]
125 UnclosedParen { pos: usize },
126 #[error("empty expression")]
127 Empty,
128}
129
130impl std::str::FromStr for Gpr {
140 type Err = GprParseError;
141 fn from_str(s: &str) -> Result<Self, Self::Err> {
142 let tokens = tokenize(s)?;
143 if tokens.is_empty() {
144 return Err(GprParseError::Empty);
145 }
146 let mut parser = Parser { tokens: &tokens, pos: 0 };
147 let expr = parser.parse_or()?;
148 if parser.pos != tokens.len() {
149 let (bad_pos, _) = tokens[parser.pos];
150 return Err(GprParseError::UnexpectedEnd { pos: bad_pos });
151 }
152 Ok(expr.normalize())
153 }
154}
155
156#[derive(Clone, Debug, Eq, PartialEq)]
157enum Tok {
158 LParen,
159 RParen,
160 And,
161 Or,
162 Ident(String),
163}
164
165fn tokenize(s: &str) -> Result<Vec<(usize, Tok)>, GprParseError> {
166 let mut out = Vec::new();
167 let bytes = s.as_bytes();
168 let mut i = 0;
169 while i < bytes.len() {
170 let c = bytes[i] as char;
171 match c {
172 ' ' | '\t' | '\n' | '\r' => {
173 i += 1;
174 }
175 '(' => {
176 out.push((i, Tok::LParen));
177 i += 1;
178 }
179 ')' => {
180 out.push((i, Tok::RParen));
181 i += 1;
182 }
183 '&' => {
184 out.push((i, Tok::And));
185 i += 1;
186 }
187 '|' => {
188 out.push((i, Tok::Or));
189 i += 1;
190 }
191 '\'' | '"' => {
192 let quote = c;
193 let start = i + 1;
194 i += 1;
195 while i < bytes.len() && (bytes[i] as char) != quote {
196 i += 1;
197 }
198 if i >= bytes.len() {
199 return Err(GprParseError::UnexpectedEnd { pos: start });
200 }
201 let id: String = s[start..i].to_string();
202 out.push((start, Tok::Ident(id)));
203 i += 1; }
205 ch if is_ident_start(ch) => {
206 let start = i;
207 while i < bytes.len() && is_ident_cont(bytes[i] as char) {
208 i += 1;
209 }
210 let slice = &s[start..i];
211 let lower = slice.to_ascii_lowercase();
212 let tok = match lower.as_str() {
213 "and" => Tok::And,
214 "or" => Tok::Or,
215 _ => Tok::Ident(slice.to_string()),
216 };
217 out.push((start, tok));
218 }
219 other => return Err(GprParseError::UnexpectedChar { ch: other, pos: i }),
220 }
221 }
222 Ok(out)
223}
224
225fn is_ident_start(c: char) -> bool {
226 c.is_ascii_alphabetic() || c == '_'
227}
228
229fn is_ident_cont(c: char) -> bool {
230 c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '-' | ':' | '+')
231}
232
233struct Parser<'a> {
234 tokens: &'a [(usize, Tok)],
235 pos: usize,
236}
237
238impl<'a> Parser<'a> {
239 fn peek(&self) -> Option<&'a Tok> {
240 self.tokens.get(self.pos).map(|(_, t)| t)
241 }
242 fn peek_pos(&self) -> usize {
243 self.tokens
244 .get(self.pos)
245 .map(|(p, _)| *p)
246 .unwrap_or_else(|| self.tokens.last().map(|(p, _)| *p + 1).unwrap_or(0))
247 }
248 fn bump(&mut self) -> &'a Tok {
249 let t = &self.tokens[self.pos].1;
250 self.pos += 1;
251 t
252 }
253
254 fn parse_or(&mut self) -> Result<Gpr, GprParseError> {
255 let mut left = self.parse_and()?;
256 while matches!(self.peek(), Some(Tok::Or)) {
257 self.bump();
258 let right = self.parse_and()?;
259 left = match left {
260 Gpr::Or { mut operands } => {
261 operands.push(right);
262 Gpr::Or { operands }
263 }
264 other => Gpr::Or { operands: vec![other, right] },
265 };
266 }
267 Ok(left)
268 }
269
270 fn parse_and(&mut self) -> Result<Gpr, GprParseError> {
271 let mut left = self.parse_atom()?;
272 while matches!(self.peek(), Some(Tok::And)) {
273 self.bump();
274 let right = self.parse_atom()?;
275 left = match left {
276 Gpr::And { mut operands } => {
277 operands.push(right);
278 Gpr::And { operands }
279 }
280 other => Gpr::And { operands: vec![other, right] },
281 };
282 }
283 Ok(left)
284 }
285
286 fn parse_atom(&mut self) -> Result<Gpr, GprParseError> {
287 match self.peek() {
288 Some(Tok::LParen) => {
289 let open_pos = self.peek_pos();
290 self.bump();
291 let expr = self.parse_or()?;
292 match self.peek() {
293 Some(Tok::RParen) => {
294 self.bump();
295 Ok(expr)
296 }
297 _ => Err(GprParseError::UnclosedParen { pos: open_pos }),
298 }
299 }
300 Some(Tok::Ident(s)) => {
301 let id = s.clone();
302 self.bump();
303 Ok(Gpr::gene(id))
304 }
305 Some(Tok::And) | Some(Tok::Or) | Some(Tok::RParen) => {
306 Err(GprParseError::UnexpectedChar { ch: '?', pos: self.peek_pos() })
307 }
308 None => Err(GprParseError::UnexpectedEnd { pos: self.peek_pos() }),
309 }
310 }
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316 use std::str::FromStr;
317
318 #[test]
319 fn single_gene() {
320 let g: Gpr = "b0001".parse().unwrap();
321 assert_eq!(g, Gpr::gene("b0001"));
322 }
323
324 #[test]
325 fn precedence_and_binds_tighter() {
326 let g: Gpr = "a and b or c and d".parse().unwrap();
327 assert_eq!(
328 g,
329 Gpr::Or {
330 operands: vec![
331 Gpr::And { operands: vec![Gpr::gene("a"), Gpr::gene("b")] },
332 Gpr::And { operands: vec![Gpr::gene("c"), Gpr::gene("d")] },
333 ]
334 }
335 );
336 }
337
338 #[test]
339 fn parens_override_precedence() {
340 let g: Gpr = "(a or b) and (c or d)".parse().unwrap();
341 assert_eq!(
342 g,
343 Gpr::And {
344 operands: vec![
345 Gpr::Or { operands: vec![Gpr::gene("a"), Gpr::gene("b")] },
346 Gpr::Or { operands: vec![Gpr::gene("c"), Gpr::gene("d")] },
347 ]
348 }
349 );
350 }
351
352 #[test]
353 fn symbolic_operators() {
354 let g: Gpr = "a & (b | c)".parse().unwrap();
355 assert_eq!(
356 g,
357 Gpr::And {
358 operands: vec![
359 Gpr::gene("a"),
360 Gpr::Or { operands: vec![Gpr::gene("b"), Gpr::gene("c")] },
361 ]
362 }
363 );
364 }
365
366 #[test]
367 fn collect_genes_unique() {
368 let g: Gpr = "a and (b or a or c)".parse().unwrap();
369 let mut v = vec![];
370 g.collect_genes(&mut v);
371 assert_eq!(
372 v.iter().map(|g| g.as_str().to_string()).collect::<Vec<_>>(),
373 vec!["a", "b", "c"]
374 );
375 }
376
377 #[test]
378 fn display_roundtrip() {
379 let g: Gpr = "a and (b or c)".parse().unwrap();
380 let s = g.to_string();
381 let g2: Gpr = s.parse().unwrap();
382 assert_eq!(g, g2);
383 }
384
385 #[test]
386 fn empty_is_error() {
387 assert!(matches!(Gpr::from_str(" ").unwrap_err(), GprParseError::Empty));
388 }
389
390 #[test]
391 fn unclosed_paren_is_error() {
392 assert!(matches!(
393 Gpr::from_str("a and (b or c").unwrap_err(),
394 GprParseError::UnclosedParen { .. }
395 ));
396 }
397}