aa_regex/
lib.rs

1/*! Macros for peptidic sequences regular expressions
2
3 Collection of macros to help crafting regular expression matching peptidic sequences.
4
5 ## Usage
6
7 ```rust
8 use aa_regex::{any, any_of, except };
9
10let any = any!(); // => let any = "[ARNDCEQGHILKMFPSTWYV]";
11assert_eq!(any, "[ARNDCEQGHILKMFPSTWYV]");
12
13let any_aromatics = any_of!(W, F, Y); // => let any_aromatics = "[WFY]";
14assert_eq!(any_aromatics, "[WFY]");
15
16let no_proline = except!(P); // => let no_proline = "[ARNDCEQGHILKMFSTWYV]";
17assert_eq!(no_proline, "[ARNDCEQGHILKMFSTWYV]");
18
19let motif = concat!(any_of!(R, H, K), except!(P)); // => let motif = "[RHK][ARNDCEQGHILKMFSTWYV]";
20assert_eq!(motif, "[RHK][ARNDCEQGHILKMFSTWYV]")
21 ```
22*/
23// #![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
24// #![allow(dead_code)]
25// #![allow(clippy::missing_errors_doc)]
26
27use proc_macro::{Literal, TokenStream, TokenTree};
28use proc_macro2::Span;
29use proc_macro_error::{abort, proc_macro_error};
30use syn::{parse::Parse, parse_macro_input, punctuated::Punctuated, Ident, Token};
31
32#[derive(Debug)]
33struct AaInput(Vec<Ident>);
34
35impl Parse for AaInput {
36    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
37        let vars = Punctuated::<Ident, Token![,]>::parse_terminated(input)?;
38        Ok(Self(vars.into_iter().collect()))
39    }
40}
41
42static AA_LIST: &[u8] = b"ARNDCEQGHILKMFPSTWYV";
43
44/// Any amino acids
45///
46/// ## Usage
47///
48/// ```
49/// #[macro_use]
50///  use aa_regex::any;
51///
52/// let any = any!(); // => let any = "[ARNDCEQGHILKMFPSTWYV]";
53/// assert_eq!(any, "[ARNDCEQGHILKMFPSTWYV]");
54/// ```
55///
56/// ## Compilation errors
57///
58/// Compilation will fail if:
59///
60/// - arguments are given to the macro
61#[proc_macro]
62#[proc_macro_error]
63pub fn any(input: TokenStream) -> TokenStream {
64    let parsed = parse_macro_input!(input as AaInput);
65
66    if !parsed.0.is_empty() {
67        abort!(Span::call_site(), "No argument allowed")
68    }
69
70    // TODO check emty input
71    core::iter::once(TokenTree::Literal(Literal::string(
72        "[ARNDCEQGHILKMFPSTWYV]",
73    )))
74    .collect()
75}
76
77/// Any of the selected amino acids
78///
79/// ## Usage
80///
81/// ```
82/// #[macro_use]
83///  use aa_regex::any_of;
84///
85/// let one = any_of!(P);
86/// assert_eq!(one, "P");
87///
88/// let some = any_of!(C, D, E);
89/// assert_eq!(some, "[CDE]")
90/// ```
91///
92/// ## Compilation errors
93///
94/// Compilation will fail if:
95///
96/// - non amino acid characters are used
97/// - an amino acid was already added
98/// - no amino acid to add
99///  
100#[proc_macro]
101#[proc_macro_error]
102pub fn any_of(input: TokenStream) -> TokenStream {
103    let mut buffer = vec![b'['];
104    let mut counter = 1_usize;
105    let parsed = parse_macro_input!(input as AaInput);
106
107    if parsed.0.is_empty() {
108        abort!(Span::call_site(), "no amino acid to add")
109    }
110    for x in &parsed.0 {
111        if buffer.len() > 20 {
112            // 21 => 20 aa + [
113            abort!(x.span(), "Cannot add more amino acids")
114        }
115
116        let as_string = x.to_string().to_uppercase();
117
118        if as_string.len() > 1 {
119            abort!(x.span(), "Expected only one character"; help = "only 1-letter amino acid code are accepted");
120        }
121
122        let aa_to_add = as_string.as_bytes().first().unwrap();
123        if AA_LIST.contains(aa_to_add) {
124            if None == buffer.iter().position(|aa| aa == aa_to_add) {
125                buffer.push(*aa_to_add);
126                counter += 1;
127            } else {
128                abort!(x.span(), "Amino acid already added");
129            }
130        } else {
131            abort!(x.span(), "Not an amino acid");
132        }
133    }
134
135    // add the finishing ] or remove the starting  [
136    if counter > 2 {
137        buffer.push(b']');
138        counter += 1;
139    } else {
140        buffer.remove(0_usize);
141        counter -= 1;
142    }
143
144    core::iter::once(TokenTree::Literal(Literal::string(
145        core::str::from_utf8(&buffer[..counter]).unwrap(),
146    )))
147    .collect()
148}
149
150/// Except some amino acids
151///
152/// ## Usage
153///
154/// ```
155/// #[macro_use]
156///  use aa_regex::except;
157///
158/// let some = except!(C, D, E);
159/// ```
160///
161/// ## Compilation errors
162///
163/// Compilation will fail if:
164///
165/// - non amino acid characters are used
166/// - an amino acid was already removed
167/// - there are too many exceptions
168/// - there are no exceptions
169///
170#[proc_macro]
171#[proc_macro_error]
172pub fn except(input: TokenStream) -> TokenStream {
173    let mut buffer = vec![
174        b'[', b'A', b'R', b'N', b'D', b'C', b'E', b'Q', b'G', b'H', b'I', b'L', b'K', b'M', b'F',
175        b'P', b'S', b'T', b'W', b'Y', b'V', b']',
176    ];
177
178    let mut counter = 22_usize;
179
180    let parsed = parse_macro_input!(input as AaInput);
181
182    if parsed.0.is_empty() {
183        abort!(Span::call_site(), "no exceptions to add"; help = "add exceptions like `except!(P)` to avoid prolines for example")
184    }
185
186    for x in &parsed.0 {
187        if buffer.len() < 4 {
188            abort!(x.span(), "Cannot add more exceptions")
189        }
190        let as_string = x.to_string().to_uppercase();
191        if as_string.len() > 1 {
192            abort!(x.span(), "Expected only one character"; help = "only 1-letter amino acid code are accepted");
193        }
194        let aa_to_remove = as_string.as_bytes().first().unwrap();
195        if AA_LIST.contains(aa_to_remove) {
196            if let Some(position) = buffer.iter().position(|aa| aa == aa_to_remove) {
197                buffer.remove(position);
198                counter -= 1;
199            } else {
200                abort!(x.span(), "Exception already in place");
201            }
202        } else {
203            abort!(x.span(), "Not an amino acid");
204        }
205    }
206
207    core::iter::once(TokenTree::Literal(Literal::string(
208        core::str::from_utf8(&buffer[..counter]).unwrap(),
209    )))
210    .collect()
211}