format_like/lib.rs
1//! A macro for creating format-like macros
2//!
3//! Have you ever wanted to emulate the functionality of the `format!`
4//! family of macros, but with an output that is not a [`String`] or
5//! something built from a [`String`]?
6//!
7//! No?
8//!
9//! Well, still, this might still be interesting for you.
10//!
11//! `format-like` aims to let _you_ decide how to interpret what is
12//! inside `{}` pairs, instead of calling something like
13//! `std::fmt::Display::fmt(&value)`.
14//!
15//! Additionaly, it lets you create 3 other types of bracket pairs:
16//! `()`, `[]` and `<>`, so you can interpret things in even more
17//! ways!
18//!
19//! Here's how it works:
20//!
21//! ```rust
22//! # #![feature(decl_macro)]
23//! use format_like::format_like;
24//!
25//! struct CommentedString(String, Vec<(usize, String)>);
26//!
27//! let comment = "there is an error in this word";
28//! let text = "text";
29//! let range = 0..usize::MAX;
30//!
31//! let commented_string = format_like!(
32//! parse_str,
33//! [('{', parse_interpolation, false), ('<', parse_comment, true)],
34//! CommentedString(String::new(), Vec::new()),
35//! "This is <comment>worng {}, and this is the end of the range {range.end}",
36//! text
37//! );
38//! # macro parse_str($value:expr, $str:literal) {{ $value }}
39//! # macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{ $value }}
40//! # macro parse_comment($value:expr, $added:expr, $modif:literal) {{ $value }}
41//! ```
42//!
43//! In this example, the `{}` should work as intended, but you also
44//! have access to `<>` interpolation. Inside `<>`, a comment will be
45//! added, with the associated `usize` being its position in the
46//! [`String`].
47//!
48//! This will all be done through the `parse_str`,
49//! `parse_interpolation` and `parse_comment` macros:
50//!
51//! ```rust
52//! #![feature(decl_macro)]
53//! macro parse_str($value:expr, $str:literal) {{
54//! let mut commented_string = $value;
55//! commented_string.0.push_str($str);
56//! commented_string
57//! }}
58//!
59//! macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
60//! let CommentedString(string, comments) = $value;
61//! let string = format!(concat!("{}{", $modif, "}"), string, $added);
62//! CommentedString(string, comments)
63//! }}
64//!
65//! macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
66//! let mut commented_string = $value;
67//! commented_string
68//! .1
69//! .push((commented_string.0.len(), $added.to_string()));
70//! commented_string
71//! }}
72//! ```
73//!
74//! The `parse_str` macro will be responsible for handling the non
75//! `{}` or `<>` parts of the literal `&str`. The `parse_comment` and
76//! `parse_interpolation` methods will handle what's inside the `<>`
77//! and `{}` pairs, respectively.
78//!
79//! `parse_comment` and `parse_interpolation` must have three
80//! parameters, one for the `value` being modified (in this case, a
81//! `CommentedString`), one for the object being added (it's
82//! [`Display`] objects in this case, but it could be anything else),
83//! and a modifier (`"?", "#?", ".3", etc), which might come after a
84//! `":"` is found in the pair.
85//!
86//! Now, as I mentioned earlier, this crate is meant for you to create
87//! _your own_ format like macros, so you should package all of this
88//! up into a single macro, like this:
89//!
90//! ```rust
91//! #![feature(decl_macro)]
92//! use format_like::format_like;
93//!
94//! #[derive(Debug, PartialEq)]
95//! struct CommentedString(String, Vec<(usize, String)>);
96//!
97//! let comment = "there is an error in this word";
98//! let text = "text";
99//! let range = 0..usize::MAX;
100//!
101//! let commented_string = commented_string!(
102//! "This is <comment>worng {}, and this is the end of the range {range.end}",
103//! text
104//! );
105//!
106//! assert_eq!(
107//! commented_string,
108//! CommentedString(
109//! "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
110//! vec![(8, "there is an error in this word".to_string())]
111//! )
112//! );
113//!
114//! macro commented_string($($parts:tt)*) {
115//! format_like!(
116//! parse_str,
117//! [('{', parse_interpolation, false), ('<', parse_comment, true)],
118//! CommentedString(String::new(), Vec::new()),
119//! $($parts)*
120//! )
121//! }
122//!
123//! macro parse_str($value:expr, $str:literal) {{
124//! let mut commented_string = $value;
125//! commented_string.0.push_str($str);
126//! commented_string
127//! }}
128//!
129//! macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
130//! let CommentedString(string, comments) = $value;
131//! let string = format!(concat!("{}{", $modif, "}"), string, $added);
132//! CommentedString(string, comments)
133//! }}
134//!
135//! macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
136//! let mut commented_string = $value;
137//! commented_string.1.push((commented_string.0.len(), $added.to_string()));
138//! commented_string
139//! }}
140//! ```
141//!
142//! ## Forced inlining
143//!
144//! You might be wondering: What are the `false` and `true` in the
145//! second argument of [`format_like!`] used for?
146//!
147//! Well, they determine wether an argument _must_ be inlined (i.e. be
148//! placed within the string like `{arg}`). This is useful when you
149//! want to limit the types of arguments that a macro should handle.
150//!
151//! As you might have seen earlier, [`format_like!`] accepts member
152//! access, like `{range.end}`. If you force a parameter to always be
153//! placed inline, that limits the types of tokens your macro must be
154//! able to handle, so you could rewrite the `parse_comment` macro to
155//! be:
156//!
157//! ```rust
158//! #![feature(decl_macro)]
159//! macro parse_comment($value:expr, $($identifier:ident).*, $modif:literal) {{
160//! // innards
161//! }}
162//! ```
163//!
164//! While this may not seem useful, it comes with two interesting
165//! abilities:
166//!
167//! 1 - If arguments must be inlined, you are allowed to leave the
168//! pair empty, like `<>`, and you can handle this situation
169//! differently if you want.
170//! 2 - By accessing the `$identifiers` directly, you can manipulate
171//! them in whichever way you want, heck, they may not even point to
172//! any actual variable in the code, and could be some sort of
173//! differently handled string literal.
174//!
175//! ## Motivation
176//!
177//! Even after reading all that, I wouldn't be surprised if you
178//! haven't found any particular use for this crate, and that's fine.
179//!
180//! But here is what was _my_ motivation for creating it:
181//!
182//! In my _in development_ text editor [Duat], there _used to be_ a
183//! `text` macro, which created a `Text` struct, which was essentially
184//! a [`String`] with formatting `Tag`s added on to it.
185//!
186//! It used to work like this:
187//!
188//! ```rust,ignore
189//! let text = text!("start" [RedColor] variable " " other_variable " ");
190//! ```
191//!
192//! This macro was a simple declarative macro, so while it was easy to
193//! implement, there were several drawbacks to its design:
194//!
195//! - It was ignored by rustfmt;
196//! - It didn't look like Rust;
197//! - tree-sitter failed at syntax highlighting it;
198//! - It didn't look like Rust;
199//! - Way too much space was occupied by simple things like `" "`;
200//! - It didn't look like Rust;
201//!
202//! And now I have replaced the old `text` macro with a new version,
203//! based on `format_like!`, which makes for a much cleaner design:
204//!
205//! ```rust,ignore
206//! let text = text!("start [RedColor]{variable} {other_variable} ");
207//! ```
208//!
209//! [`Display`]: std::fmt::Display
210//! [Duat]: https://github.com/AhoyISki/duat
211use std::ops::Range;
212
213use proc_macro::TokenStream;
214use proc_macro2::Span;
215use quote::{format_ident, quote};
216use syn::{
217 Expr, Ident, LitBool, LitChar, LitStr, Path, Token, bracketed, parenthesized,
218 parse::{Parse, ParseBuffer},
219 parse_macro_input,
220 spanned::Spanned,
221};
222
223/// A macro for creating format-like macros
224///
225/// ```rust
226/// #![feature(decl_macro)]
227/// use format_like::format_like;
228///
229/// #[derive(Debug, PartialEq)]
230/// struct CommentedString(String, Vec<(usize, String)>);
231///
232/// let comment = "there is an error in this word";
233/// let text = "text";
234/// let range = 0..usize::MAX;
235///
236/// let commented_string = commented_string!(
237/// "This is <comment>worng {}, and this is the end of the range {range.end}",
238/// text
239/// );
240///
241/// assert_eq!(
242/// commented_string,
243/// CommentedString(
244/// "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
245/// vec![(8, "there is an error in this word".to_string())]
246/// )
247/// );
248///
249/// macro commented_string($($parts:tt)*) {
250/// format_like!(
251/// parse_str,
252/// [('{', parse_interpolation, false), ('<', parse_comment, true)],
253/// CommentedString(String::new(), Vec::new()),
254/// $($parts)*
255/// )
256/// }
257///
258/// macro parse_str($value:expr, $str:literal) {{
259/// let mut commented_string = $value;
260/// commented_string.0.push_str($str);
261/// commented_string
262/// }}
263///
264/// macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
265/// let CommentedString(string, comments) = $value;
266/// let string = format!(concat!("{}{", $modif, "}"), string, $added);
267/// CommentedString(string, comments)
268/// }}
269///
270/// macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
271/// let mut commented_string = $value;
272/// commented_string.1.push((commented_string.0.len(), $added.to_string()));
273/// commented_string
274/// }}
275/// ```
276#[proc_macro]
277pub fn format_like(input: TokenStream) -> TokenStream {
278 let fmt_like = parse_macro_input!(input as FormatLike);
279 let lit_str = &fmt_like.str;
280 let str = lit_str.value();
281 let arg_parsers = &fmt_like.arg_parsers;
282
283 let mut args = Vec::new();
284
285 let mut arg: Option<CurrentArg> = None;
286 let mut unescaped_rhs: Option<(usize, char)> = None;
287 let mut push_new_ident = true;
288 let mut positional_needed = 0;
289
290 let str_span = |r: Range<usize>| lit_str.token().subspan(r.start + 1..r.end + 1).unwrap();
291
292 for (i, char) in str.char_indices() {
293 if let Some((j, p, mut idents, mut modif)) = arg.take() {
294 let (lhs, rhs) = &arg_parsers[p].delims;
295 if char == *rhs {
296 let modif = match modif {
297 Some(range) => {
298 let str =
299 unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
300 let str = LitStr::new(str, str_span(range));
301
302 quote! { #str }
303 }
304 None => quote! { "" },
305 };
306
307 if idents.is_empty() {
308 if arg_parsers[p].inline_only {
309 args.push(Arg::Inlined(p, Vec::new(), modif));
310 } else {
311 positional_needed += 1;
312 args.push(Arg::Positional(p, j..i + 1, modif));
313 }
314 } else if push_new_ident {
315 return compile_err(
316 str_span(i - 1..i),
317 "invalid format string: field access expected an identifier",
318 );
319 } else {
320 let idents = idents
321 .into_iter()
322 .map(|range| {
323 let mut ident = format_ident!("{}", unsafe {
324 str::from_utf8_unchecked(&str.as_bytes()[range.clone()])
325 });
326 ident.set_span(str_span(range));
327 ident
328 })
329 .collect();
330
331 args.push(Arg::Inlined(p, idents, modif));
332 }
333
334 continue;
335 } else if char == lhs.value() && idents.is_empty() {
336 // If arg was empty, that means the delimiter was repeated, so escape
337 // it.
338 extend_str_arg(&mut args, i);
339 continue;
340 }
341
342 // We might have mismatched delimiters
343 if arg_parsers
344 .iter()
345 .any(|ap| char == ap.delims.0.value() || char == ap.delims.1)
346 {
347 let mut err = syn::Error::new(
348 str_span(i..i + 1),
349 "invalid format string: wrong match for delimiter",
350 );
351 err.combine(syn::Error::new(
352 str_span(j..j + 1),
353 format!("from this delimiter, expected {rhs}"),
354 ));
355 let compile_err = err.into_compile_error();
356
357 // Since this should return an Expr, we need to brace it.
358 let err = quote! {{
359 #compile_err
360 }};
361
362 return err.into();
363 } else if char.is_alphanumeric() || char == '_' || modif.is_some() {
364 if let Some(modif) = &mut modif {
365 modif.end = i + 1;
366 } else if let Some(last) = idents.last_mut()
367 && !push_new_ident
368 {
369 last.end = i + 1;
370 } else {
371 idents.push(i..i + 1);
372 push_new_ident = false;
373 }
374 } else if char == '.' {
375 if let Some(modif) = &mut modif {
376 modif.end = i + 1;
377 } else if push_new_ident {
378 // Can't start an identifier list with '.' or put multiple '.'s in a
379 // row.
380 return compile_err(
381 str_span(i..i + 1),
382 "invalid format string: unexpected '.' here",
383 );
384 } else {
385 push_new_ident = true;
386 }
387 } else if char == ':' {
388 if let Some(modif) = &mut modif {
389 modif.end = i + 1;
390 } else {
391 modif = Some(i + 1..i + 1);
392 }
393 } else {
394 return compile_err(
395 str_span(i..i + 1),
396 format!("invalid format string: unexpected {char} here"),
397 );
398 }
399
400 arg = Some((j, p, idents, modif));
401 } else if let Some(p) = arg_parsers
402 .iter()
403 .position(|ap| char == ap.delims.0.value() || char == ap.delims.1)
404 {
405 // If the char is a left delimiter, begin an argument.
406 // If it is a right delimiter, handle dangling right parameter
407 // scenarios.
408 if char == arg_parsers[p].delims.0.value() {
409 push_new_ident = true;
410 arg = Some((i, p, Vec::new(), None));
411 } else if let Some((j, unescaped)) = unescaped_rhs {
412 // Double delimiters are escaped.
413 if char == unescaped {
414 unescaped_rhs = None;
415 extend_str_arg(&mut args, i);
416 } else {
417 return compile_err(
418 str_span(j..j + 1),
419 format!("invalid format string: unmatched {unescaped} found"),
420 );
421 }
422 } else {
423 unescaped_rhs = Some((i, char));
424 }
425 } else if let Some((j, unescaped)) = unescaped_rhs {
426 return compile_err(
427 str_span(j..j + 1),
428 format!("invalid format string: unmatched {unescaped} found"),
429 );
430 } else {
431 extend_str_arg(&mut args, i);
432 }
433 }
434
435 if let Some((i, unescaped)) = unescaped_rhs {
436 return compile_err(
437 str_span(i..i + 1),
438 format!("invalid format string: unmatched {unescaped} found"),
439 );
440 }
441
442 let expr = fmt_like.initial;
443 let mut token_stream = quote! { #expr };
444
445 let positional_provided = fmt_like.exprs.len();
446 let mut exprs = fmt_like.exprs.into_iter();
447
448 for arg in args {
449 token_stream = match arg {
450 Arg::Str(range) => {
451 let str = unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
452 let str = LitStr::new(str, str_span(range));
453 let parser = &fmt_like.str_parser;
454
455 quote! {
456 #parser!(#token_stream, #str)
457 }
458 }
459 Arg::Positional(p, range, modif) => {
460 if let Some(expr) = exprs.next() {
461 let parser = &fmt_like.arg_parsers[p].parser;
462
463 quote! {
464 #parser!(#token_stream, #expr, #modif)
465 }
466 } else {
467 let npl = if positional_needed == 1 { "" } else { "s" };
468 let pverb = if positional_provided == 1 {
469 "is"
470 } else {
471 "are"
472 };
473 let ppl = if positional_provided == 1 { "" } else { "s" };
474
475 return compile_err(
476 str_span(range),
477 format!(
478 "{positional_needed} positional argument{npl} in format string, but there {pverb} {positional_provided} argument{ppl}"
479 ),
480 );
481 }
482 }
483 Arg::Inlined(p, idents, modif) => {
484 let parser = &fmt_like.arg_parsers[p].parser;
485
486 quote! {
487 #parser!(#token_stream, #(#idents).*, #modif)
488 }
489 }
490 }
491 }
492
493 // There should be no positional arguments left.
494 if let Some(expr) = exprs.next() {
495 return compile_err(expr.span(), "argument never used");
496 }
497
498 token_stream.into()
499}
500
501struct ArgParser {
502 delims: (LitChar, char),
503 parser: Path,
504 inline_only: bool,
505}
506
507impl ArgParser {
508 fn new(input: &ParseBuffer) -> syn::Result<Self> {
509 const VALID_DELIMS: &[[char; 2]] = &[['{', '}'], ['(', ')'], ['[', ']'], ['<', '>']];
510 let elems;
511 parenthesized!(elems in input);
512
513 let delims = {
514 let left: LitChar = elems.parse()?;
515
516 if let Some([_, right]) = VALID_DELIMS.iter().find(|[rhs, _]| left.value() == *rhs) {
517 (left, *right)
518 } else {
519 return Err(syn::Error::new_spanned(left, "is not a valid delimiter"));
520 }
521 };
522
523 elems.parse::<Token![,]>()?;
524 let parser = elems.parse()?;
525 elems.parse::<Token![,]>()?;
526 let inline_only = elems.parse::<LitBool>()?.value();
527
528 Ok(Self { delims, parser, inline_only })
529 }
530}
531
532struct FormatLike {
533 str_parser: Path,
534 arg_parsers: Vec<ArgParser>,
535 initial: Expr,
536 str: LitStr,
537 exprs: Vec<Expr>,
538}
539
540impl Parse for FormatLike {
541 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
542 let str_parser = input.parse()?;
543 input.parse::<Token![,]>()?;
544
545 let arg_parsers: Vec<ArgParser> = {
546 let arg_parsers;
547 bracketed!(arg_parsers in input);
548 arg_parsers
549 .parse_terminated(ArgParser::new, Token![,])?
550 .into_iter()
551 .collect()
552 };
553
554 if let Some((lhs, rhs)) = arg_parsers.iter().enumerate().find_map(|(i, lhs)| {
555 arg_parsers.iter().enumerate().find_map(|(j, rhs)| {
556 (i != j)
557 .then(|| (rhs.delims.1 == lhs.delims.1).then_some((lhs, rhs)))
558 .flatten()
559 })
560 }) {
561 let l_err = syn::Error::new_spanned(&lhs.delims.0, "this delimiter");
562 let mut r_err = syn::Error::new_spanned(&rhs.delims.0, "is the same as this");
563 r_err.combine(l_err);
564 return Err(r_err);
565 }
566 input.parse::<Token![,]>()?;
567
568 let initial = input.parse()?;
569 input.parse::<Token![,]>()?;
570
571 let str = input.parse()?;
572
573 let exprs = if !input.is_empty() {
574 input.parse::<Token![,]>()?;
575 input
576 .parse_terminated(Expr::parse, Token![,])?
577 .into_iter()
578 .collect()
579 } else {
580 Vec::new()
581 };
582
583 Ok(Self {
584 str_parser,
585 arg_parsers,
586 initial,
587 str,
588 exprs,
589 })
590 }
591}
592
593enum Arg {
594 Str(Range<usize>),
595 Positional(usize, Range<usize>, proc_macro2::TokenStream),
596 Inlined(usize, Vec<Ident>, proc_macro2::TokenStream),
597}
598
599fn extend_str_arg(args: &mut Vec<Arg>, start_of_char: usize) {
600 if let Some(Arg::Str(range)) = args.last_mut() {
601 range.end = start_of_char + 1;
602 } else {
603 args.push(Arg::Str(start_of_char..start_of_char + 1))
604 }
605}
606
607fn compile_err(span: Span, msg: impl std::fmt::Display) -> TokenStream {
608 let compile_err = syn::Error::new(span, msg).into_compile_error();
609
610 let err = quote! {{
611 #compile_err
612 }};
613
614 err.into()
615}
616
617type CurrentArg = (usize, usize, Vec<Range<usize>>, Option<Range<usize>>);