1#![no_std]
2
3extern crate alloc;
4extern crate proc_macro;
5
6use alloc::{borrow::ToOwned, format, vec::Vec};
7
8use bstr::ByteSlice;
9use proc_macro::TokenStream;
10use proc_macro2::Span;
11use quote::{quote, ToTokens};
12use syn::{
13 parse::{Parse, ParseStream, Result},
14 parse_macro_input, parse_str, Expr, Ident, Lit, LitBool, LitByteStr, Token, TypePath,
15};
16
17struct Unformat {
18 pattern: Vec<u8>,
19 text: Expr,
20 is_pattern_str: bool,
21 full_match: bool,
22}
23
24impl Parse for Unformat {
25 fn parse(input: ParseStream) -> Result<Self> {
26 #[allow(clippy::wildcard_enum_match_arm)]
27 let (pattern, is_pattern_str) = match input.parse::<Lit>()? {
28 Lit::Str(str) => (str.value().into_bytes(), true),
29 Lit::ByteStr(byte_str) => (byte_str.value(), false),
30 _ => return Err(input.error("expected a string literal")),
31 };
32
33 input.parse::<Token![,]>()?;
34
35 let text = input.parse::<Expr>()?;
36
37 let full_match = if input.parse::<Token![,]>().is_ok() {
38 input.parse::<LitBool>().map_or(false, |bool| bool.value)
39 } else {
40 false
41 };
42 Ok(Self {
43 pattern,
44 text,
45 is_pattern_str,
46 full_match,
47 })
48 }
49}
50
51enum Assignee {
52 Index(u32),
53 Variable(Ident),
54}
55
56impl Assignee {
57 fn new(variable: &str, index: &mut u32) -> Self {
58 variable.parse::<u32>().map_or_else(
59 |_| {
60 if variable.is_empty() {
61 let tuple_index = *index;
62 *index = index.saturating_add(1);
63 Self::Index(tuple_index)
64 } else {
65 Self::Variable(parse_str(variable).expect("invalid variable name"))
66 }
67 },
68 Self::Index,
69 )
70 }
71}
72
73enum CaptureTypePath {
74 Str,
75 Bytes,
76 Typed(TypePath),
77}
78
79impl CaptureTypePath {
80 fn new(type_path: &str, is_pattern_str: bool) -> Self {
81 if type_path.is_empty() {
82 if is_pattern_str {
83 Self::Str
84 } else {
85 Self::Bytes
86 }
87 } else if type_path == "&str" {
88 Self::Str
89 } else if type_path == "&[u8]" {
90 Self::Bytes
91 } else {
92 Self::Typed(parse_str(type_path).expect("invalid type path"))
93 }
94 }
95}
96
97impl ToTokens for CaptureTypePath {
98 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
99 tokens.extend(match *self {
100 Self::Str => {
101 quote! { &str }
102 }
103 Self::Bytes => {
104 quote! { &[u8] }
105 }
106 Self::Typed(ref type_path) => {
107 quote! { #type_path }
108 }
109 });
110 }
111}
112
113struct Capture {
114 text: Vec<u8>,
115 assignee: Assignee,
116 r#type: CaptureTypePath,
117}
118
119impl Capture {
120 fn new(text: &[u8], capture: &str, is_pattern_str: bool, index: &mut u32) -> Self {
121 let (variable, type_path) = capture.split_once(':').unwrap_or((capture, ""));
122 Self {
123 text: text.to_vec(),
124 assignee: Assignee::new(variable, index),
125 r#type: CaptureTypePath::new(type_path, is_pattern_str),
126 }
127 }
128}
129
130impl ToTokens for Capture {
131 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
132 let rhs = match self.r#type {
133 CaptureTypePath::Str => {
134 quote! {
135 if let Ok(__unfmt_left) = __unfmt_left.to_str() {
136 __unfmt_left
137 } else {
138 break 'unformat None;
139 }
140 }
141 }
142 CaptureTypePath::Bytes => {
143 quote! { __unfmt_left }
144 }
145 CaptureTypePath::Typed(ref type_path) => {
146 quote! {
147 if let Ok(Ok(__unfmt_left)) = __unfmt_left.to_str().map(|value| value.parse::<#type_path>()) {
148 __unfmt_left
149 } else {
150 break 'unformat None;
151 }
152 }
153 }
154 };
155 let assignment = match self.assignee {
156 Assignee::Index(ref index) => {
157 let ident = Ident::new(&format!("__unfmt_capture_{index}"), Span::call_site());
158 quote! { let #ident = #rhs }
159 }
160 Assignee::Variable(ref ident) => {
161 quote! { #ident = Some(#rhs) }
162 }
163 };
164 let text = LitByteStr::new(&self.text, Span::call_site());
165
166 tokens.extend(if self.text.is_empty() {
172 quote! { let (__unfmt_left, __unfmt_right) = (__unfmt_byte_text, b""); }
173 } else {
174 quote! {
175 let Some((__unfmt_left, __unfmt_right)) = __unfmt_byte_text.split_once_str(#text) else {
176 break 'unformat None;
177 };
178 }
179 });
180
181 tokens.extend(quote! {
182 #assignment;
183 __unfmt_byte_text = BStr::new(__unfmt_right);
184 });
185 }
186}
187
188#[proc_macro]
208pub fn unformat(input: TokenStream) -> TokenStream {
209 let Unformat {
210 pattern,
211 text,
212 is_pattern_str,
213 full_match,
214 } = parse_macro_input!(input as Unformat);
215
216 let (initial_part, captures) = compile(&pattern, is_pattern_str);
217 let initial_part = Lit::ByteStr(LitByteStr::new(&initial_part, Span::call_site()));
218
219 let capture_idents = {
220 let mut capture_indices = captures
221 .iter()
222 .filter_map(|capture| match capture.assignee {
223 Assignee::Index(capture_index) => Some(capture_index),
224 Assignee::Variable(..) => None,
225 })
226 .collect::<Vec<_>>();
227
228 capture_indices.sort_by(|&index_a, &index_b| index_a.cmp(&index_b));
229
230 capture_indices
231 .into_iter()
232 .map(|index| Ident::new(&format!("__unfmt_capture_{index}"), Span::call_site()))
233 .collect::<Vec<_>>()
234 };
235
236 let capture_block = if full_match {
237 quote! {
238 if !__unfmt_left.is_empty() {
239 break 'unformat None;
240 }
241 #(#captures)*
242 if !__unfmt_byte_text.is_empty() {
243 break 'unformat None;
244 }
245 }
246 } else {
247 quote! { #(#captures)* }
248 };
249
250 TokenStream::from(quote! {
251 'unformat: {
252 use ::core::str::FromStr;
253 use ::unfmt::bstr::{ByteSlice, BStr};
254 let Some((__unfmt_left, mut __unfmt_byte_text)) = BStr::new(#text).split_once_str(#initial_part) else {
255 break 'unformat None;
256 };
257 #capture_block
258 Some((#(#capture_idents),*))
259 }
260 })
261}
262
263fn compile(pattern: &[u8], is_pattern_str: bool) -> (Vec<u8>, Vec<Capture>) {
264 let mut pattern = pattern.replace(b"{{", "\u{f8fd}");
265 pattern.reverse();
266 let mut pattern = pattern.replace(b"}}", "\u{f8fe}");
267 pattern.reverse();
268
269 let mut pattern_parts = pattern.split_str("{");
270
271 let initial_part = unsafe {
273 pattern_parts
274 .next()
275 .unwrap_unchecked()
276 .replace("\u{f8fd}", "{")
277 };
278
279 let mut current_index: u32 = 0;
280 let mut compiled_pattern = Vec::new();
281 for pattern_part in pattern_parts {
282 let (capture, text) = pattern_part
283 .split_once_str("}")
284 .expect("unmatched } in pattern");
285 let capture = capture
286 .to_str()
287 .expect("invalid UTF-8 in capture names")
288 .to_owned();
289 let mut text = text.replace("\u{f8fd}", b"{");
290 text.reverse();
291 let mut text = text.replace("\u{f8fe}", b"}");
292 text.reverse();
293 compiled_pattern.push(Capture::new(
294 &text,
295 &capture,
296 is_pattern_str,
297 &mut current_index,
298 ));
299 }
300
301 assert!(
302 compiled_pattern.windows(2).all(|parts| parts
303 .iter()
304 .any(|&Capture { ref text, .. }| !text.is_empty())),
305 "consecutive captures are not allowed"
306 );
307
308 (initial_part, compiled_pattern)
309}