format_like/
lib.rs

1//! A macro for creating format-like macros
2//!
3//! Have you ever wanted to emulate the functionality of the `format!`
4//! family of macros, but with an output that is not a [`String`] or
5//! something built from a [`String`]?
6//!
7//! No?
8//!
9//! Well, still, this might still be interesting for you.
10//!
11//! `format-like` aims to let _you_ decide how to interpret what is
12//! inside `{}` pairs, instead of calling something like
13//! `std::fmt::Display::fmt(&value)`.
14//!
15//! Additionaly, it lets you create 3 other types of bracket pairs:
16//! `()`, `[]` and `<>`, so you can interpret things in even more
17//! ways!
18//!
19//! Here's how it works:
20//!
21//! ```rust
22//! # #![feature(decl_macro)]
23//! use format_like::format_like;
24//!
25//! struct CommentedString(String, Vec<(usize, String)>);
26//!
27//! let comment = "there is an error in this word";
28//! let text = "text";
29//! let range = 0..usize::MAX;
30//!
31//! let commented_string = format_like!(
32//!     parse_str,
33//!     [('{', parse_interpolation, false), ('<', parse_comment, true)],
34//!     CommentedString(String::new(), Vec::new()),
35//!     "This is <comment>worng {}, and this is the end of the range {range.end}",
36//!     text
37//! );
38//! # macro parse_str($value:expr, $str:literal) {{ $value }}
39//! # macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{ $value }}
40//! # macro parse_comment($value:expr, $added:expr, $modif:literal) {{ $value }}
41//! ```
42//!
43//! In this example, the `{}` should work as intended, but you also
44//! have access to `<>` interpolation. Inside `<>`, a comment will be
45//! added, with the associated `usize` being its position in the
46//! [`String`].
47//!
48//! This will all be done through the `parse_str`,
49//! `parse_interpolation` and `parse_comment` macros:
50//!
51//! ```rust
52//! #![feature(decl_macro)]
53//! macro parse_str($value:expr, $str:literal) {{
54//!     let mut commented_string = $value;
55//!     commented_string.0.push_str($str);
56//!     commented_string
57//! }}
58//!
59//! macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
60//!     let CommentedString(string, comments) = $value;
61//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
62//!     CommentedString(string, comments)
63//! }}
64//!
65//! macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
66//!     let mut commented_string = $value;
67//!     commented_string
68//!         .1
69//!         .push((commented_string.0.len(), $added.to_string()));
70//!     commented_string
71//! }}
72//! ```
73//!
74//! The `parse_str` macro will be responsible for handling the non
75//! `{}` or `<>` parts of the literal `&str`. The `parse_comment` and
76//! `parse_interpolation` methods will handle what's inside the `<>`
77//! and `{}` pairs, respectively.
78//!
79//! `parse_comment` and `parse_interpolation` must have three
80//! parameters, one for the `value` being modified (in this case, a
81//! `CommentedString`), one for the object being added (it's
82//! [`Display`] objects in this case, but it could be anything else),
83//! and a modifier (`"?", "#?", ".3", etc), which might come after a
84//! `":"` is found in the pair.
85//!
86//! Now, as I mentioned earlier, this crate is meant for you to create
87//! _your own_ format like macros, so you should package all of this
88//! up into a single macro, like this:
89//!
90//! ```rust
91//! #![feature(decl_macro)]
92//! use format_like::format_like;
93//!
94//! #[derive(Debug, PartialEq)]
95//! struct CommentedString(String, Vec<(usize, String)>);
96//!
97//! let comment = "there is an error in this word";
98//! let text = "text";
99//! let range = 0..usize::MAX;
100//!
101//! let commented_string = commented_string!(
102//!     "This is <comment>worng {}, and this is the end of the range {range.end}",
103//!     text
104//! );
105//!
106//! assert_eq!(
107//!     commented_string,
108//!     CommentedString(
109//!         "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
110//!         vec![(8, "there is an error in this word".to_string())]
111//!     )
112//! );
113//!
114//! macro commented_string($($parts:tt)*) {
115//!     format_like!(
116//!         parse_str,
117//!         [('{', parse_interpolation, false), ('<', parse_comment, true)],
118//!         CommentedString(String::new(), Vec::new()),
119//!         $($parts)*
120//!     )
121//! }
122//!
123//! macro parse_str($value:expr, $str:literal) {{
124//!     let mut commented_string = $value;
125//!     commented_string.0.push_str($str);
126//!     commented_string
127//! }}
128//!
129//! macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
130//!     let CommentedString(string, comments) = $value;
131//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
132//!     CommentedString(string, comments)
133//! }}
134//!
135//! macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
136//!     let mut commented_string = $value;
137//!     commented_string.1.push((commented_string.0.len(), $added.to_string()));
138//!     commented_string
139//! }}
140//! ```
141//!
142//! ## Forced inlining
143//!
144//! You might be wondering: What are the `false` and `true` in the
145//! second argument of [`format_like!`] used for?
146//!
147//! Well, they determine wether an argument _must_ be inlined (i.e. be
148//! placed within the string like `{arg}`). This is useful when you
149//! want to limit the types of arguments that a macro should handle.
150//!
151//! As you might have seen earlier, [`format_like!`] accepts member
152//! access, like `{range.end}`. If you force a parameter to always be
153//! placed inline, that limits the types of tokens your macro must be
154//! able to handle, so you could rewrite the `parse_comment` macro to
155//! be:
156//!
157//! ```rust
158//! #![feature(decl_macro)]
159//! macro parse_comment($value:expr, $($identifier:ident).*, $modif:literal) {{
160//!     // innards
161//! }}
162//! ```
163//!
164//! While this may not seem useful, it comes with two interesting
165//! abilities:
166//!
167//! 1 - If arguments must be inlined, you are allowed to leave the
168//! pair empty, like `<>`, and you can handle this situation
169//! differently if you want.
170//! 2 - By accessing the `$identifiers` directly, you can manipulate
171//! them in whichever way you want, heck, they may not even point to
172//! any actual variable in the code, and could be some sort of
173//! differently handled string literal.
174//!
175//! ## Motivation
176//!
177//! Even after reading all that, I wouldn't be surprised if you
178//! haven't found any particular use for this crate, and that's fine.
179//!
180//! But here is what was _my_ motivation for creating it:
181//!
182//! In my _in development_ text editor [Duat], there _used to be_ a
183//! `text` macro, which created a `Text` struct, which was essentially
184//! a [`String`] with formatting `Tag`s added on to it.
185//!
186//! It used to work like this:
187//!
188//! ```rust,ignore
189//! let text = text!("start" [RedColor] variable " " other_variable " ");
190//! ```
191//!
192//! This macro was a simple declarative macro, so while it was easy to
193//! implement, there were several drawbacks to its design:
194//!
195//! - It was ignored by rustfmt;
196//! - It didn't look like Rust;
197//! - tree-sitter failed at syntax highlighting it;
198//! - It didn't look like Rust;
199//! - Way too much space was occupied by simple things like `" "`;
200//! - It didn't look like Rust;
201//!
202//! And now I have replaced the old `text` macro with a new version,
203//! based on `format_like!`, which makes for a much cleaner design:
204//!
205//! ```rust,ignore
206//! let text = text!("start [RedColor]{variable} {other_variable} ");
207//! ```
208//!
209//! [`Display`]: std::fmt::Display
210//! [Duat]: https://github.com/AhoyISki/duat
211use std::ops::Range;
212
213use proc_macro::TokenStream;
214use proc_macro2::Span;
215use quote::{format_ident, quote};
216use syn::{
217    Expr, Ident, LitBool, LitChar, LitStr, Path, Token, bracketed, parenthesized,
218    parse::{Parse, ParseBuffer},
219    parse_macro_input,
220    spanned::Spanned,
221};
222
223/// A macro for creating format-like macros
224///
225/// ```rust
226/// #![feature(decl_macro)]
227/// use format_like::format_like;
228///
229/// #[derive(Debug, PartialEq)]
230/// struct CommentedString(String, Vec<(usize, String)>);
231///
232/// let comment = "there is an error in this word";
233/// let text = "text";
234/// let range = 0..usize::MAX;
235///
236/// let commented_string = commented_string!(
237///     "This is <comment>worng {}, and this is the end of the range {range.end}",
238///     text
239/// );
240///
241/// assert_eq!(
242///     commented_string,
243///     CommentedString(
244///         "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
245///         vec![(8, "there is an error in this word".to_string())]
246///     )
247/// );
248///
249/// macro commented_string($($parts:tt)*) {
250///     format_like!(
251///         parse_str,
252///         [('{', parse_interpolation, false), ('<', parse_comment, true)],
253///         CommentedString(String::new(), Vec::new()),
254///         $($parts)*
255///     )
256/// }
257///
258/// macro parse_str($value:expr, $str:literal) {{
259///     let mut commented_string = $value;
260///     commented_string.0.push_str($str);
261///     commented_string
262/// }}
263///
264/// macro parse_interpolation($value:expr, $added:expr, $modif:literal) {{
265///     let CommentedString(string, comments) = $value;
266///     let string = format!(concat!("{}{", $modif, "}"), string, $added);
267///     CommentedString(string, comments)
268/// }}
269///
270/// macro parse_comment($value:expr, $added:expr, $_modif:literal) {{
271///     let mut commented_string = $value;
272///     commented_string.1.push((commented_string.0.len(), $added.to_string()));
273///     commented_string
274/// }}
275/// ```
276#[proc_macro]
277pub fn format_like(input: TokenStream) -> TokenStream {
278    let fmt_like = parse_macro_input!(input as FormatLike);
279    let lit_str = &fmt_like.str;
280    let str = lit_str.value();
281    let arg_parsers = &fmt_like.arg_parsers;
282
283    let mut args = Vec::new();
284
285    let mut arg: Option<CurrentArg> = None;
286    let mut unescaped_rhs: Option<(usize, char)> = None;
287    let mut push_new_ident = true;
288    let mut positional_needed = 0;
289
290    let str_span = |r: Range<usize>| lit_str.token().subspan(r.start + 1..r.end + 1).unwrap();
291
292    for (i, char) in str.char_indices() {
293        if let Some((j, p, mut idents, mut modif)) = arg.take() {
294            let (lhs, rhs) = &arg_parsers[p].delims;
295            if char == *rhs {
296                let modif = match modif {
297                    Some(range) => {
298                        let str =
299                            unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
300                        let str = LitStr::new(str, str_span(range));
301
302                        quote! { #str }
303                    }
304                    None => quote! { "" },
305                };
306
307                if idents.is_empty() {
308                    if arg_parsers[p].inline_only {
309                        args.push(Arg::Inlined(p, Vec::new(), modif));
310                    } else {
311                        positional_needed += 1;
312                        args.push(Arg::Positional(p, j..i + 1, modif));
313                    }
314                } else if push_new_ident {
315                    return compile_err(
316                        str_span(i - 1..i),
317                        "invalid format string: field access expected an identifier",
318                    );
319                } else {
320                    let idents = idents
321                        .into_iter()
322                        .map(|range| {
323                            let mut ident = format_ident!("{}", unsafe {
324                                str::from_utf8_unchecked(&str.as_bytes()[range.clone()])
325                            });
326                            ident.set_span(str_span(range));
327                            ident
328                        })
329                        .collect();
330
331                    args.push(Arg::Inlined(p, idents, modif));
332                }
333
334                continue;
335            } else if char == lhs.value() && idents.is_empty() {
336                // If arg was empty, that means the delimiter was repeated, so escape
337                // it.
338                extend_str_arg(&mut args, i);
339                continue;
340            }
341
342            // We might have mismatched delimiters
343            if arg_parsers
344                .iter()
345                .any(|ap| char == ap.delims.0.value() || char == ap.delims.1)
346            {
347                let mut err = syn::Error::new(
348                    str_span(i..i + 1),
349                    "invalid format string: wrong match for delimiter",
350                );
351                err.combine(syn::Error::new(
352                    str_span(j..j + 1),
353                    format!("from this delimiter, expected {rhs}"),
354                ));
355                let compile_err = err.into_compile_error();
356
357                // Since this should return an Expr, we need to brace it.
358                let err = quote! {{
359                    #compile_err
360                }};
361
362                return err.into();
363            } else if char.is_alphanumeric() || char == '_' || modif.is_some() {
364                if let Some(modif) = &mut modif {
365                    modif.end = i + 1;
366                } else if let Some(last) = idents.last_mut()
367                    && !push_new_ident
368                {
369                    last.end = i + 1;
370                } else {
371                    idents.push(i..i + 1);
372                    push_new_ident = false;
373                }
374            } else if char == '.' {
375                if let Some(modif) = &mut modif {
376                    modif.end = i + 1;
377                } else if push_new_ident {
378                    // Can't start an identifier list with '.' or put multiple '.'s in a
379                    // row.
380                    return compile_err(
381                        str_span(i..i + 1),
382                        "invalid format string: unexpected '.' here",
383                    );
384                } else {
385                    push_new_ident = true;
386                }
387            } else if char == ':' {
388                if let Some(modif) = &mut modif {
389                    modif.end = i + 1;
390                } else {
391                    modif = Some(i + 1..i + 1);
392                }
393            } else {
394                return compile_err(
395                    str_span(i..i + 1),
396                    format!("invalid format string: unexpected {char} here"),
397                );
398            }
399
400            arg = Some((j, p, idents, modif));
401        } else if let Some(p) = arg_parsers
402            .iter()
403            .position(|ap| char == ap.delims.0.value() || char == ap.delims.1)
404        {
405            // If the char is a left delimiter, begin an argument.
406            // If it is a right delimiter, handle dangling right parameter
407            // scenarios.
408            if char == arg_parsers[p].delims.0.value() {
409                push_new_ident = true;
410                arg = Some((i, p, Vec::new(), None));
411            } else if let Some((j, unescaped)) = unescaped_rhs {
412                // Double delimiters are escaped.
413                if char == unescaped {
414                    unescaped_rhs = None;
415                    extend_str_arg(&mut args, i);
416                } else {
417                    return compile_err(
418                        str_span(j..j + 1),
419                        format!("invalid format string: unmatched {unescaped} found"),
420                    );
421                }
422            } else {
423                unescaped_rhs = Some((i, char));
424            }
425        } else if let Some((j, unescaped)) = unescaped_rhs {
426            return compile_err(
427                str_span(j..j + 1),
428                format!("invalid format string: unmatched {unescaped} found"),
429            );
430        } else {
431            extend_str_arg(&mut args, i);
432        }
433    }
434
435    if let Some((i, unescaped)) = unescaped_rhs {
436        return compile_err(
437            str_span(i..i + 1),
438            format!("invalid format string: unmatched {unescaped} found"),
439        );
440    }
441
442    let expr = fmt_like.initial;
443    let mut token_stream = quote! { #expr };
444
445    let positional_provided = fmt_like.exprs.len();
446    let mut exprs = fmt_like.exprs.into_iter();
447
448    for arg in args {
449        token_stream = match arg {
450            Arg::Str(range) => {
451                let str = unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
452                let str = LitStr::new(str, str_span(range));
453                let parser = &fmt_like.str_parser;
454
455                quote! {
456                    #parser!(#token_stream, #str)
457                }
458            }
459            Arg::Positional(p, range, modif) => {
460                if let Some(expr) = exprs.next() {
461                    let parser = &fmt_like.arg_parsers[p].parser;
462
463                    quote! {
464                        #parser!(#token_stream, #expr, #modif)
465                    }
466                } else {
467                    let npl = if positional_needed == 1 { "" } else { "s" };
468                    let pverb = if positional_provided == 1 {
469                        "is"
470                    } else {
471                        "are"
472                    };
473                    let ppl = if positional_provided == 1 { "" } else { "s" };
474
475                    return compile_err(
476                        str_span(range),
477                        format!(
478                            "{positional_needed} positional argument{npl} in format string, but there {pverb} {positional_provided} argument{ppl}"
479                        ),
480                    );
481                }
482            }
483            Arg::Inlined(p, idents, modif) => {
484                let parser = &fmt_like.arg_parsers[p].parser;
485
486                quote! {
487                    #parser!(#token_stream, #(#idents).*, #modif)
488                }
489            }
490        }
491    }
492
493    // There should be no positional arguments left.
494    if let Some(expr) = exprs.next() {
495        return compile_err(expr.span(), "argument never used");
496    }
497
498    token_stream.into()
499}
500
501struct ArgParser {
502    delims: (LitChar, char),
503    parser: Path,
504    inline_only: bool,
505}
506
507impl ArgParser {
508    fn new(input: &ParseBuffer) -> syn::Result<Self> {
509        const VALID_DELIMS: &[[char; 2]] = &[['{', '}'], ['(', ')'], ['[', ']'], ['<', '>']];
510        let elems;
511        parenthesized!(elems in input);
512
513        let delims = {
514            let left: LitChar = elems.parse()?;
515
516            if let Some([_, right]) = VALID_DELIMS.iter().find(|[rhs, _]| left.value() == *rhs) {
517                (left, *right)
518            } else {
519                return Err(syn::Error::new_spanned(left, "is not a valid delimiter"));
520            }
521        };
522
523        elems.parse::<Token![,]>()?;
524        let parser = elems.parse()?;
525        elems.parse::<Token![,]>()?;
526        let inline_only = elems.parse::<LitBool>()?.value();
527
528        Ok(Self { delims, parser, inline_only })
529    }
530}
531
532struct FormatLike {
533    str_parser: Path,
534    arg_parsers: Vec<ArgParser>,
535    initial: Expr,
536    str: LitStr,
537    exprs: Vec<Expr>,
538}
539
540impl Parse for FormatLike {
541    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
542        let str_parser = input.parse()?;
543        input.parse::<Token![,]>()?;
544
545        let arg_parsers: Vec<ArgParser> = {
546            let arg_parsers;
547            bracketed!(arg_parsers in input);
548            arg_parsers
549                .parse_terminated(ArgParser::new, Token![,])?
550                .into_iter()
551                .collect()
552        };
553
554        if let Some((lhs, rhs)) = arg_parsers.iter().enumerate().find_map(|(i, lhs)| {
555            arg_parsers.iter().enumerate().find_map(|(j, rhs)| {
556                (i != j)
557                    .then(|| (rhs.delims.1 == lhs.delims.1).then_some((lhs, rhs)))
558                    .flatten()
559            })
560        }) {
561            let l_err = syn::Error::new_spanned(&lhs.delims.0, "this delimiter");
562            let mut r_err = syn::Error::new_spanned(&rhs.delims.0, "is the same as this");
563            r_err.combine(l_err);
564            return Err(r_err);
565        }
566        input.parse::<Token![,]>()?;
567
568        let initial = input.parse()?;
569        input.parse::<Token![,]>()?;
570
571        let str = input.parse()?;
572
573        let exprs = if !input.is_empty() {
574            input.parse::<Token![,]>()?;
575            input
576                .parse_terminated(Expr::parse, Token![,])?
577                .into_iter()
578                .collect()
579        } else {
580            Vec::new()
581        };
582
583        Ok(Self {
584            str_parser,
585            arg_parsers,
586            initial,
587            str,
588            exprs,
589        })
590    }
591}
592
593enum Arg {
594    Str(Range<usize>),
595    Positional(usize, Range<usize>, proc_macro2::TokenStream),
596    Inlined(usize, Vec<Ident>, proc_macro2::TokenStream),
597}
598
599fn extend_str_arg(args: &mut Vec<Arg>, start_of_char: usize) {
600    if let Some(Arg::Str(range)) = args.last_mut() {
601        range.end = start_of_char + 1;
602    } else {
603        args.push(Arg::Str(start_of_char..start_of_char + 1))
604    }
605}
606
607fn compile_err(span: Span, msg: impl std::fmt::Display) -> TokenStream {
608    let compile_err = syn::Error::new(span, msg).into_compile_error();
609
610    let err = quote! {{
611        #compile_err
612    }};
613
614    err.into()
615}
616
617type CurrentArg = (usize, usize, Vec<Range<usize>>, Option<Range<usize>>);