format_like/
lib.rs

1//! A macro for creating format-like macros
2//!
3//! Have you ever wanted to emulate the functionality of the `format!`
4//! family of macros, but with an output that is not a [`String`] or
5//! something built from a [`String`]?
6//!
7//! No?
8//!
9//! Well, still, this might still be interesting for you.
10//!
11//! `format-like` aims to let _you_ decide how to interpret what is
12//! inside `{}` pairs, instead of calling something like
13//! `std::fmt::Display::fmt(&value)`.
14//!
15//! Additionaly, it lets you create 3 other types of bracket pairs:
16//! `()`, `[]` and `<>`, so you can interpret things in even more
17//! ways! This does of course come with the regular escaping that the
18//! [`format!`] macro does, so `{{` is escaped to just `{`, the same
19//! being the case for the other delimiters as well.
20//!
21//! Here's how it works:
22//!
23//! ```rust
24//! # #![feature(decl_macro)]
25//! use format_like::format_like;
26//!
27//! struct CommentedString(String, Vec<(usize, String)>);
28//!
29//! let comment = "there is an error in this word";
30//! let text = "text";
31//! let range = 0..usize::MAX;
32//!
33//! let commented_string = format_like!(
34//!     parse_str,
35//!     [('{', parse_interpolation, false), ('<', parse_comment, true)],
36//!     CommentedString(String::new(), Vec::new()),
37//!     "This is <comment>regluar {}, interpolated and commented {range.end}",
38//!     text
39//! );
40//! # macro parse_str($value:expr, $str:literal) {{ $value }}
41//! # macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{ $value }}
42//! # macro parse_comment($value:expr, $modif:literal, $added:expr) {{ $value }}
43//! ```
44//!
45//! In this example, the `{}` should work as intended, but you also
46//! have access to `<>` interpolation. Inside `<>`, a comment will be
47//! added, with the associated `usize` being its position in the
48//! [`String`].
49//!
50//! This will all be done through the `parse_str`,
51//! `parse_interpolation` and `parse_comment` macros:
52//!
53//! ```rust
54//! #![feature(decl_macro)]
55//! macro parse_str($value:expr, $str:literal) {{
56//!     let mut commented_string = $value;
57//!     commented_string.0.push_str($str);
58//!     commented_string
59//! }}
60//!
61//! macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
62//!     let CommentedString(string, comments) = $value;
63//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
64//!     CommentedString(string, comments)
65//! }}
66//!
67//! macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
68//!     let mut commented_string = $value;
69//!     commented_string
70//!         .1
71//!         .push((commented_string.0.len(), $added.to_string()));
72//!     commented_string
73//! }}
74//! ```
75//!
76//! The `parse_str` macro will be responsible for handling the non
77//! `{}` or `<>` parts of the literal `&str`. The `parse_comment` and
78//! `parse_interpolation` methods will handle what's inside the `<>`
79//! and `{}` pairs, respectively.
80//!
81//! `parse_comment` and `parse_interpolation` must have three
82//! parameters, one for the `value` being modified (in this case, a
83//! `CommentedString`), one for the modifier (`"?", "#?", ".3", etc),
84//! which might come after a `":"` in the pair. and one for the object
85//! being added (it's [`Display`] objects in this case, but
86//! it could be anything else).
87//!
88//! Now, as I mentioned earlier, this crate is meant for you to create
89//! _your own_ format like macros, so you should package all of this
90//! up into a single macro, like this:
91//!
92//! ```rust
93//! #![feature(decl_macro)]
94//! use format_like::format_like;
95//!
96//! #[derive(Debug, PartialEq)]
97//! struct CommentedString(String, Vec<(usize, String)>);
98//!
99//! let comment = "there is an error in this word";
100//! let text = "text";
101//! let range = 0..usize::MAX;
102//!
103//! let commented_string = commented_string!(
104//!     "This is <comment>regluar {}, interpolated and commented {range.end}",
105//!     text
106//! );
107//!
108//! assert_eq!(
109//!     commented_string,
110//!     CommentedString(
111//!         "This is regluar text, interpolated and commented 18446744073709551615".to_string(),
112//!         vec![(8, "there is an error in this word".to_string())]
113//!     )
114//! );
115//!
116//! macro commented_string($($parts:tt)*) {
117//!     format_like!(
118//!         parse_str,
119//!         [('{', parse_interpolation, false), ('<', parse_comment, true)],
120//!         CommentedString(String::new(), Vec::new()),
121//!         $($parts)*
122//!     )
123//! }
124//!
125//! macro parse_str($value:expr, $str:literal) {{
126//!     let mut commented_string = $value;
127//!     commented_string.0.push_str($str);
128//!     commented_string
129//! }}
130//!
131//! macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
132//!     let CommentedString(string, comments) = $value;
133//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
134//!     CommentedString(string, comments)
135//! }}
136//!
137//! macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
138//!     let mut commented_string = $value;
139//!     commented_string.1.push((commented_string.0.len(), $added.to_string()));
140//!     commented_string
141//! }}
142//! ```
143//!
144//! ## Forced inlining
145//!
146//! You might be wondering: What are the `false` and `true` in the
147//! second argument of [`format_like!`] used for?
148//!
149//! Well, they determine wether an argument _must_ be inlined (i.e. be
150//! placed within the string like `{arg}`). This is useful when you
151//! want to limit the types of arguments that a macro should handle.
152//!
153//! As you might have seen earlier, [`format_like!`] accepts member
154//! access, like `{range.end}`. If you force a parameter to always be
155//! placed inline, that limits the types of tokens your macro must be
156//! able to handle, so you could rewrite the `parse_comment` macro to
157//! be:
158//!
159//! ```rust
160//! #![feature(decl_macro)]
161//! macro parse_comment($value:expr, $modif:literal, $($identifier:ident).*) {{
162//!     // innards
163//! }}
164//! ```
165//!
166//! While this may not seem useful, it comes with two interesting
167//! abilities:
168//!
169//! 1 - If arguments must be inlined, you are allowed to leave the
170//! pair empty, like `<>`, and you can handle this situation
171//! differently if you want.
172//! 2 - By accessing the `$identifiers` directly, you can manipulate
173//! them in whichever way you want, heck, they may not even point to
174//! any actual variable in the code, and could be some sort of
175//! differently handled string literal.
176//!
177//! ## Motivation
178//!
179//! Even after reading all that, I wouldn't be surprised if you
180//! haven't found any particular use for this crate, and that's fine.
181//!
182//! But here is what was _my_ motivation for creating it:
183//!
184//! In my _in development_ text editor [Duat], there _used to be_ a
185//! `text` macro, which created a `Text` struct, which was essentially
186//! a [`String`] with formatting `Tag`s added on to it.
187//!
188//! It used to work like this:
189//!
190//! ```rust,ignore
191//! let text = text!("start " [RedColor.subvalue] variable " " other_variable " ");
192//! ```
193//!
194//! This macro was a simple declarative macro, so while it was easy to
195//! implement, there were several drawbacks to its design:
196//!
197//! - It was ignored by rustfmt;
198//! - It didn't look like Rust;
199//! - tree-sitter failed at syntax highlighting it;
200//! - It didn't look like Rust;
201//! - Way too much space was occupied by simple things like `" "`;
202//! - It didn't look like Rust;
203//!
204//! And now I have replaced the old `text` macro with a new version,
205//! based on `format_like!`, which makes for a much cleaner design:
206//!
207//! ```rust,ignore
208//! let text = text!("start [RedColor.subvalue]{variable} {other_variable} ");
209//! ```
210//!
211//! [`Display`]: std::fmt::Display
212//! [Duat]: https://github.com/AhoyISki/duat
213use std::ops::Range;
214
215use proc_macro::TokenStream;
216use proc_macro2::Span;
217use quote::{format_ident, quote};
218use syn::{
219    Expr, Ident, LitBool, LitChar, LitStr, Path, Token, bracketed, parenthesized,
220    parse::{Parse, ParseBuffer},
221    parse_macro_input,
222    spanned::Spanned,
223};
224
225/// A macro for creating format-like macros
226///
227/// ```rust
228/// #![feature(decl_macro)]
229/// use format_like::format_like;
230///
231/// #[derive(Debug, PartialEq)]
232/// struct CommentedString(String, Vec<(usize, String)>);
233///
234/// let comment = "there is an error in this word";
235/// let text = "text";
236/// let range = 0..usize::MAX;
237///
238/// let commented_string = commented_string!(
239///     "This is <comment>worng {}, and this is the end of the range {range.end}",
240///     text
241/// );
242///
243/// assert_eq!(
244///     commented_string,
245///     CommentedString(
246///         "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
247///         vec![(8, "there is an error in this word".to_string())]
248///     )
249/// );
250///
251/// macro commented_string($($parts:tt)*) {
252///     format_like!(
253///         parse_str,
254///         [('{', parse_interpolation, false), ('<', parse_comment, true)],
255///         CommentedString(String::new(), Vec::new()),
256///         $($parts)*
257///     )
258/// }
259///
260/// macro parse_str($value:expr, $str:literal) {{
261///     let mut commented_string = $value;
262///     commented_string.0.push_str($str);
263///     commented_string
264/// }}
265///
266/// macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
267///     let CommentedString(string, comments) = $value;
268///     let string = format!(concat!("{}{", $modif, "}"), string, $added);
269///     CommentedString(string, comments)
270/// }}
271///
272/// macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
273///     let mut commented_string = $value;
274///     commented_string.1.push((commented_string.0.len(), $added.to_string()));
275///     commented_string
276/// }}
277/// ```
278#[proc_macro]
279pub fn format_like(input: TokenStream) -> TokenStream {
280    let fmt_like = parse_macro_input!(input as FormatLike);
281    let lit_str = &fmt_like.str;
282    let str = lit_str.value();
283    let arg_parsers = &fmt_like.arg_parsers;
284
285    let mut args = Vec::new();
286
287    let mut arg: Option<CurrentArg> = None;
288    let mut unescaped_rhs: Option<(usize, char)> = None;
289    let mut push_new_ident = true;
290    let mut positional_needed = 0;
291
292    let str_span = |r: Range<usize>| lit_str.token().subspan(r.start + 1..r.end + 1).unwrap();
293
294    for (i, char) in str.char_indices() {
295        if let Some((j, p, mut idents, mut modif)) = arg.take() {
296            let (lhs, rhs) = &arg_parsers[p].delims;
297            if char == *rhs {
298                let modif = match modif {
299                    Some(range) => {
300                        let str =
301                            unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
302                        let str = LitStr::new(str, str_span(range));
303
304                        quote! { #str }
305                    }
306                    None => quote! { "" },
307                };
308
309                if idents.is_empty() {
310                    if arg_parsers[p].inline_only {
311                        args.push(Arg::Inlined(p, Vec::new(), modif));
312                    } else {
313                        positional_needed += 1;
314                        args.push(Arg::Positional(p, j..i + 1, modif));
315                    }
316                } else if push_new_ident {
317                    return compile_err(
318                        str_span(i - 1..i),
319                        "invalid format string: field access expected an identifier",
320                    );
321                } else {
322                    let idents = idents
323                        .into_iter()
324                        .map(|range| {
325                            let mut ident = format_ident!("{}", unsafe {
326                                str::from_utf8_unchecked(&str.as_bytes()[range.clone()])
327                            });
328                            ident.set_span(str_span(range));
329                            ident
330                        })
331                        .collect();
332
333                    args.push(Arg::Inlined(p, idents, modif));
334                }
335
336                continue;
337            } else if char == lhs.value() && idents.is_empty() {
338                // If arg was empty, that means the delimiter was repeated, so escape
339                // it.
340                extend_str_arg(&mut args, char, i - 1);
341                continue;
342            }
343
344            // We might have mismatched delimiters
345            if arg_parsers
346                .iter()
347                .any(|ap| char == ap.delims.0.value() || char == ap.delims.1)
348            {
349                let mut err = syn::Error::new(
350                    str_span(i..i + 1),
351                    "invalid format string: wrong match for delimiter",
352                );
353                err.combine(syn::Error::new(
354                    str_span(j..j + 1),
355                    format!("from this delimiter, expected {rhs}"),
356                ));
357                let compile_err = err.into_compile_error();
358
359                // Since this should return an Expr, we need to brace it.
360                let err = quote! {{
361                    #compile_err
362                }};
363
364                return err.into();
365            } else if char.is_alphanumeric() || char == '_' || modif.is_some() {
366                if let Some(modif) = &mut modif {
367                    modif.end = i + 1;
368                } else if let Some(last) = idents.last_mut()
369                    && !push_new_ident
370                {
371                    last.end = i + 1;
372                } else {
373                    idents.push(i..i + 1);
374                    push_new_ident = false;
375                }
376            } else if char == '.' {
377                if let Some(modif) = &mut modif {
378                    modif.end = i + 1;
379                } else if push_new_ident {
380                    // Can't start an identifier list with '.' or put multiple '.'s in a
381                    // row.
382                    return compile_err(
383                        str_span(i..i + 1),
384                        "invalid format string: unexpected '.' here",
385                    );
386                } else {
387                    push_new_ident = true;
388                }
389            } else if char == ':' {
390                if let Some(modif) = &mut modif {
391                    modif.end = i + 1;
392                } else {
393                    modif = Some(i + 1..i + 1);
394                }
395            } else {
396                return compile_err(
397                    str_span(i..i + 1),
398                    format!("invalid format string: unexpected {char} here"),
399                );
400            }
401
402            arg = Some((j, p, idents, modif));
403        } else if let Some(p) = arg_parsers
404            .iter()
405            .position(|ap| char == ap.delims.0.value() || char == ap.delims.1)
406        {
407            // If the char is a left delimiter, begin an argument.
408            // If it is a right delimiter, handle dangling right parameter
409            // scenarios.
410            if char == arg_parsers[p].delims.0.value() {
411                push_new_ident = true;
412                arg = Some((i, p, Vec::new(), None));
413            } else if let Some((j, unescaped)) = unescaped_rhs {
414                // Double delimiters are escaped.
415                if char == unescaped {
416                    unescaped_rhs = None;
417                    extend_str_arg(&mut args, char, i);
418                } else {
419                    return compile_err(
420                        str_span(j..j + 1),
421                        format!("invalid format string: unmatched {unescaped} found"),
422                    );
423                }
424            } else {
425                unescaped_rhs = Some((i, char));
426            }
427        } else if let Some((j, unescaped)) = unescaped_rhs {
428            return compile_err(
429                str_span(j..j + 1),
430                format!("invalid format string: unmatched {unescaped} found"),
431            );
432        } else {
433            extend_str_arg(&mut args, char, i);
434        }
435    }
436
437    if let Some((i, unescaped)) = unescaped_rhs {
438        return compile_err(
439            str_span(i..i + 1),
440            format!("invalid format string: unmatched {unescaped} found"),
441        );
442    }
443
444    let expr = fmt_like.initial;
445    let mut token_stream = quote! { #expr };
446
447    let positional_provided = fmt_like.exprs.len();
448    let mut exprs = fmt_like.exprs.into_iter();
449
450    for arg in args {
451        token_stream = match arg {
452            Arg::Str(string, range) => {
453                let str = LitStr::new(&string, str_span(range));
454                let parser = &fmt_like.str_parser;
455
456                quote! {
457                    #parser!(#token_stream, #str)
458                }
459            }
460            Arg::Positional(p, range, modif) => {
461                if let Some(expr) = exprs.next() {
462                    let parser = &fmt_like.arg_parsers[p].parser;
463
464                    quote! {
465                        #parser!(#token_stream, #modif, #expr)
466                    }
467                } else {
468                    let npl = if positional_needed == 1 { "" } else { "s" };
469                    let pverb = if positional_provided == 1 {
470                        "is"
471                    } else {
472                        "are"
473                    };
474                    let ppl = if positional_provided == 1 { "" } else { "s" };
475
476                    return compile_err(
477                        str_span(range),
478                        format!(
479                            "{positional_needed} positional argument{npl} in format string, but there {pverb} {positional_provided} argument{ppl}"
480                        ),
481                    );
482                }
483            }
484            Arg::Inlined(p, idents, modif) => {
485                let parser = &fmt_like.arg_parsers[p].parser;
486
487                quote! {
488                    #parser!(#token_stream, #modif, #(#idents).*)
489                }
490            }
491        }
492    }
493
494    // There should be no positional arguments left.
495    if let Some(expr) = exprs.next() {
496        return compile_err(expr.span(), "argument never used");
497    }
498
499    token_stream.into()
500}
501
502struct ArgParser {
503    delims: (LitChar, char),
504    parser: Path,
505    inline_only: bool,
506}
507
508impl ArgParser {
509    fn new(input: &ParseBuffer) -> syn::Result<Self> {
510        const VALID_DELIMS: &[[char; 2]] = &[['{', '}'], ['(', ')'], ['[', ']'], ['<', '>']];
511        let elems;
512        parenthesized!(elems in input);
513
514        let delims = {
515            let left: LitChar = elems.parse()?;
516
517            if let Some([_, right]) = VALID_DELIMS.iter().find(|[rhs, _]| left.value() == *rhs) {
518                (left, *right)
519            } else {
520                return Err(syn::Error::new_spanned(left, "is not a valid delimiter"));
521            }
522        };
523
524        elems.parse::<Token![,]>()?;
525        let parser = elems.parse()?;
526        elems.parse::<Token![,]>()?;
527        let inline_only = elems.parse::<LitBool>()?.value();
528
529        Ok(Self { delims, parser, inline_only })
530    }
531}
532
533struct FormatLike {
534    str_parser: Path,
535    arg_parsers: Vec<ArgParser>,
536    initial: Expr,
537    str: LitStr,
538    exprs: Vec<Expr>,
539}
540
541impl Parse for FormatLike {
542    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
543        let str_parser = input.parse()?;
544        input.parse::<Token![,]>()?;
545
546        let arg_parsers: Vec<ArgParser> = {
547            let arg_parsers;
548            bracketed!(arg_parsers in input);
549            arg_parsers
550                .parse_terminated(ArgParser::new, Token![,])?
551                .into_iter()
552                .collect()
553        };
554
555        if let Some((lhs, rhs)) = arg_parsers.iter().enumerate().find_map(|(i, lhs)| {
556            arg_parsers.iter().enumerate().find_map(|(j, rhs)| {
557                (i != j)
558                    .then(|| (rhs.delims.1 == lhs.delims.1).then_some((lhs, rhs)))
559                    .flatten()
560            })
561        }) {
562            let l_err = syn::Error::new_spanned(&lhs.delims.0, "this delimiter");
563            let mut r_err = syn::Error::new_spanned(&rhs.delims.0, "is the same as this");
564            r_err.combine(l_err);
565            return Err(r_err);
566        }
567        input.parse::<Token![,]>()?;
568
569        let initial = input.parse()?;
570        input.parse::<Token![,]>()?;
571
572        let str = input.parse()?;
573
574        let exprs = if !input.is_empty() {
575            input.parse::<Token![,]>()?;
576            input
577                .parse_terminated(Expr::parse, Token![,])?
578                .into_iter()
579                .collect()
580        } else {
581            Vec::new()
582        };
583
584        Ok(Self {
585            str_parser,
586            arg_parsers,
587            initial,
588            str,
589            exprs,
590        })
591    }
592}
593
594enum Arg {
595    Str(String, Range<usize>),
596    Positional(usize, Range<usize>, proc_macro2::TokenStream),
597    Inlined(usize, Vec<Ident>, proc_macro2::TokenStream),
598}
599
600fn extend_str_arg(args: &mut Vec<Arg>, char: char, i: usize) {
601    if let Some(Arg::Str(string, range)) = args.last_mut() {
602        string.push(char);
603        range.end = i + 1;
604    } else {
605        args.push(Arg::Str(String::from(char), i..i + 1))
606    }
607}
608
609fn compile_err(span: Span, msg: impl std::fmt::Display) -> TokenStream {
610    let compile_err = syn::Error::new(span, msg).into_compile_error();
611
612    let err = quote! {{
613        #compile_err
614    }};
615
616    err.into()
617}
618
619type CurrentArg = (usize, usize, Vec<Range<usize>>, Option<Range<usize>>);