format_like/
lib.rs

1//! A macro for creating format-like macros
2//!
3//! Have you ever wanted to emulate the functionality of the `format!`
4//! family of macros, but with an output that is not a [`String`] or
5//! something built from a [`String`]?
6//!
7//! No?
8//!
9//! Well, still, this might still be interesting for you.
10//!
11//! `format-like` aims to let _you_ decide how to interpret what is
12//! inside `{}` pairs, instead of calling something like
13//! `std::fmt::Display::fmt(&value)`.
14//!
15//! Additionaly, it lets you create 3 other types of bracket pairs:
16//! `()`, `[]` and `<>`, so you can interpret things in even more
17//! ways! This does of course come with the regular escaping that the
18//! [`format!`] macro does, so `{{` is escaped to just `{`, the same
19//! being the case for the other delimiters as well.
20//!
21//! Here's how it works:
22//!
23//! ```rust
24//! # #![feature(decl_macro)]
25//! use format_like::format_like;
26//!
27//! struct CommentedString(String, Vec<(usize, String)>);
28//!
29//! let comment = "there is an error in this word";
30//! let text = "text";
31//! let range = 0..usize::MAX;
32//!
33//! let commented_string = format_like!(
34//!     parse_str,
35//!     [('{', parse_interpolation, false), ('<', parse_comment, true)],
36//!     CommentedString(String::new(), Vec::new()),
37//!     "This is <comment>regluar {}, interpolated and commented {range.end}",
38//!     text
39//! );
40//! # macro parse_str($value:expr, $str:literal) {{ $value }}
41//! # macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{ $value }}
42//! # macro parse_comment($value:expr, $modif:literal, $added:expr) {{ $value }}
43//! ```
44//!
45//! In this example, the `{}` should work as intended, but you also
46//! have access to `<>` interpolation. Inside `<>`, a comment will be
47//! added, with the associated `usize` being its position in the
48//! [`String`].
49//!
50//! This will all be done through the `parse_str`,
51//! `parse_interpolation` and `parse_comment` macros:
52//!
53//! ```rust
54//! #![feature(decl_macro)]
55//! macro parse_str($value:expr, $str:literal) {{
56//!     let mut commented_string = $value;
57//!     commented_string.0.push_str($str);
58//!     commented_string
59//! }}
60//!
61//! macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
62//!     let CommentedString(string, comments) = $value;
63//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
64//!     CommentedString(string, comments)
65//! }}
66//!
67//! macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
68//!     let mut commented_string = $value;
69//!     commented_string
70//!         .1
71//!         .push((commented_string.0.len(), $added.to_string()));
72//!     commented_string
73//! }}
74//! ```
75//!
76//! The `parse_str` macro will be responsible for handling the non
77//! `{}` or `<>` parts of the literal `&str`. The `parse_comment` and
78//! `parse_interpolation` methods will handle what's inside the `<>`
79//! and `{}` pairs, respectively.
80//!
81//! `parse_comment` and `parse_interpolation` must have three
82//! parameters, one for the `value` being modified (in this case, a
83//! `CommentedString`), one for the modifier (`"?", "#?", ".3", etc),
84//! which might come after a `":"` in the pair. and one for the object
85//! being added (it's [`Display`] objects in this case, but
86//! it could be anything else).
87//!
88//! Now, as I mentioned earlier, this crate is meant for you to create
89//! _your own_ format like macros, so you should package all of this
90//! up into a single macro, like this:
91//!
92//! ```rust
93//! #![feature(decl_macro)]
94//! use format_like::format_like;
95//!
96//! #[derive(Debug, PartialEq)]
97//! struct CommentedString(String, Vec<(usize, String)>);
98//!
99//! let comment = "there is an error in this word";
100//! let text = "text";
101//! let range = 0..usize::MAX;
102//!
103//! let commented_string = commented_string!(
104//!     "This is <comment>regluar {}, interpolated and commented {range.end}",
105//!     text
106//! );
107//!
108//! assert_eq!(
109//!     commented_string,
110//!     CommentedString(
111//!         "This is regluar text, interpolated and commented 18446744073709551615".to_string(),
112//!         vec![(8, "there is an error in this word".to_string())]
113//!     )
114//! );
115//!
116//! macro commented_string($($parts:tt)*) {
117//!     format_like!(
118//!         parse_str,
119//!         [('{', parse_interpolation, false), ('<', parse_comment, true)],
120//!         CommentedString(String::new(), Vec::new()),
121//!         $($parts)*
122//!     )
123//! }
124//!
125//! macro parse_str($value:expr, $str:literal) {{
126//!     let mut commented_string = $value;
127//!     commented_string.0.push_str($str);
128//!     commented_string
129//! }}
130//!
131//! macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
132//!     let CommentedString(string, comments) = $value;
133//!     let string = format!(concat!("{}{", $modif, "}"), string, $added);
134//!     CommentedString(string, comments)
135//! }}
136//!
137//! macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
138//!     let mut commented_string = $value;
139//!     commented_string.1.push((commented_string.0.len(), $added.to_string()));
140//!     commented_string
141//! }}
142//! ```
143//!
144//! ## Forced inlining
145//!
146//! You might be wondering: What are the `false` and `true` in the
147//! second argument of [`format_like!`] used for?
148//!
149//! Well, they determine wether an argument _must_ be inlined (i.e. be
150//! placed within the string like `{arg}`). This is useful when you
151//! want to limit the types of arguments that a macro should handle.
152//!
153//! As you might have seen earlier, [`format_like!`] accepts member
154//! access, like `{range.end}`. If you force a parameter to always be
155//! placed inline, that limits the types of tokens your macro must be
156//! able to handle, so you could rewrite the `parse_comment` macro to
157//! be:
158//!
159//! ```rust
160//! #![feature(decl_macro)]
161//! macro parse_comment($value:expr, $modif:literal, $($identifier:ident).*) {{
162//!     // innards
163//! }}
164//! ```
165//!
166//! While this may not seem useful, it comes with two interesting
167//! abilities:
168//!
169//! 1 - If arguments must be inlined, you are allowed to leave the
170//! pair empty, like `<>`, and you can handle this situation
171//! differently if you want.
172//! 2 - By accessing the `$identifiers` directly, you can manipulate
173//! them in whichever way you want, heck, they may not even point to
174//! any actual variable in the code, and could be some sort of
175//! differently handled string literal.
176//!
177//! ## Motivation
178//!
179//! Even after reading all that, I wouldn't be surprised if you
180//! haven't found any particular use for this crate, and that's fine.
181//!
182//! But here is what was _my_ motivation for creating it:
183//!
184//! In my _in development_ text editor [Duat], there _used to be_ a
185//! `text` macro, which created a `Text` struct, which was essentially
186//! a [`String`] with formatting `Tag`s added on to it.
187//!
188//! It used to work like this:
189//!
190//! ```rust,ignore
191//! let text = text!("start " [RedColor.subvalue] variable " " other_variable " ");
192//! ```
193//!
194//! This macro was a simple declarative macro, so while it was easy to
195//! implement, there were several drawbacks to its design:
196//!
197//! - It was ignored by rustfmt;
198//! - It didn't look like Rust;
199//! - tree-sitter failed at syntax highlighting it;
200//! - It didn't look like Rust;
201//! - Way too much space was occupied by simple things like `" "`;
202//! - It didn't look like Rust;
203//!
204//! And now I have replaced the old `text` macro with a new version,
205//! based on `format_like!`, which makes for a much cleaner design:
206//!
207//! ```rust,ignore
208//! let text = text!("start [RedColor.subvalue]{variable} {other_variable} ");
209//! ```
210//!
211//! [`Display`]: std::fmt::Display
212//! [Duat]: https://github.com/AhoyISki/duat
213use std::ops::Range;
214
215use proc_macro::TokenStream;
216use proc_macro2::Span;
217use quote::{format_ident, quote};
218use syn::{
219    Expr, Ident, LitBool, LitChar, LitStr, Path, Token, bracketed, parenthesized,
220    parse::{Parse, ParseBuffer},
221    parse_macro_input,
222    spanned::Spanned,
223};
224
225/// A macro for creating format-like macros
226///
227/// ```rust
228/// #![feature(decl_macro)]
229/// use format_like::format_like;
230///
231/// #[derive(Debug, PartialEq)]
232/// struct CommentedString(String, Vec<(usize, String)>);
233///
234/// let comment = "there is an error in this word";
235/// let text = "text";
236/// let range = 0..usize::MAX;
237///
238/// let commented_string = commented_string!(
239///     "This is <comment>worng {}, and this is the end of the range {range.end}",
240///     text
241/// );
242///
243/// assert_eq!(
244///     commented_string,
245///     CommentedString(
246///         "This is worng text, and this is the end of the range 18446744073709551615".to_string(),
247///         vec![(8, "there is an error in this word".to_string())]
248///     )
249/// );
250///
251/// macro commented_string($($parts:tt)*) {
252///     format_like!(
253///         parse_str,
254///         [('{', parse_interpolation, false), ('<', parse_comment, true)],
255///         CommentedString(String::new(), Vec::new()),
256///         $($parts)*
257///     )
258/// }
259///
260/// macro parse_str($value:expr, $str:literal) {{
261///     let mut commented_string = $value;
262///     commented_string.0.push_str($str);
263///     commented_string
264/// }}
265///
266/// macro parse_interpolation($value:expr, $modif:literal, $added:expr) {{
267///     let CommentedString(string, comments) = $value;
268///     let string = format!(concat!("{}{", $modif, "}"), string, $added);
269///     CommentedString(string, comments)
270/// }}
271///
272/// macro parse_comment($value:expr, $_modif:literal, $added:expr) {{
273///     let mut commented_string = $value;
274///     commented_string.1.push((commented_string.0.len(), $added.to_string()));
275///     commented_string
276/// }}
277/// ```
278#[proc_macro]
279pub fn format_like(input: TokenStream) -> TokenStream {
280    let fmt_like = parse_macro_input!(input as FormatLike);
281    let lit_str = &fmt_like.str;
282    let str = lit_str.value();
283    let arg_parsers = &fmt_like.arg_parsers;
284
285    let mut args = Vec::new();
286
287    let mut arg: Option<CurrentArg> = None;
288    let mut unescaped_rhs: Option<(usize, char)> = None;
289    let mut push_new_ident = true;
290    let mut positional_needed = 0;
291
292    let str_span = |r: Range<usize>| lit_str.token().subspan(r.start + 1..r.end + 1).unwrap();
293
294    for (i, char) in str.char_indices() {
295        if let Some((j, p, mut idents, mut modif)) = arg.take() {
296            let (lhs, rhs) = &arg_parsers[p].delims;
297            if char == *rhs {
298                let modif = match modif {
299                    Some(range) => {
300                        let str =
301                            unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
302                        let str = LitStr::new(str, str_span(range));
303
304                        quote! { #str }
305                    }
306                    None => quote! { "" },
307                };
308
309                if idents.is_empty() {
310                    if arg_parsers[p].inline_only {
311                        args.push(Arg::Inlined(p, Vec::new(), modif));
312                    } else {
313                        positional_needed += 1;
314                        args.push(Arg::Positional(p, j..i + 1, modif));
315                    }
316                } else if push_new_ident {
317                    return compile_err(
318                        str_span(i - 1..i),
319                        "invalid format string: field access expected an identifier",
320                    );
321                } else {
322                    let idents = idents
323                        .into_iter()
324                        .map(|range| {
325                            let mut ident = format_ident!("{}", unsafe {
326                                str::from_utf8_unchecked(&str.as_bytes()[range.clone()])
327                            });
328                            ident.set_span(str_span(range));
329                            ident
330                        })
331                        .collect();
332
333                    args.push(Arg::Inlined(p, idents, modif));
334                }
335
336                continue;
337            } else if char == lhs.value() && idents.is_empty() {
338                // If arg was empty, that means the delimiter was repeated, so escape
339                // it.
340                extend_str_arg(&mut args, i);
341                continue;
342            }
343
344            // We might have mismatched delimiters
345            if arg_parsers
346                .iter()
347                .any(|ap| char == ap.delims.0.value() || char == ap.delims.1)
348            {
349                let mut err = syn::Error::new(
350                    str_span(i..i + 1),
351                    "invalid format string: wrong match for delimiter",
352                );
353                err.combine(syn::Error::new(
354                    str_span(j..j + 1),
355                    format!("from this delimiter, expected {rhs}"),
356                ));
357                let compile_err = err.into_compile_error();
358
359                // Since this should return an Expr, we need to brace it.
360                let err = quote! {{
361                    #compile_err
362                }};
363
364                return err.into();
365            } else if char.is_alphanumeric() || char == '_' || modif.is_some() {
366                if let Some(modif) = &mut modif {
367                    modif.end = i + 1;
368                } else if let Some(last) = idents.last_mut()
369                    && !push_new_ident
370                {
371                    last.end = i + 1;
372                } else {
373                    idents.push(i..i + 1);
374                    push_new_ident = false;
375                }
376            } else if char == '.' {
377                if let Some(modif) = &mut modif {
378                    modif.end = i + 1;
379                } else if push_new_ident {
380                    // Can't start an identifier list with '.' or put multiple '.'s in a
381                    // row.
382                    return compile_err(
383                        str_span(i..i + 1),
384                        "invalid format string: unexpected '.' here",
385                    );
386                } else {
387                    push_new_ident = true;
388                }
389            } else if char == ':' {
390                if let Some(modif) = &mut modif {
391                    modif.end = i + 1;
392                } else {
393                    modif = Some(i + 1..i + 1);
394                }
395            } else {
396                return compile_err(
397                    str_span(i..i + 1),
398                    format!("invalid format string: unexpected {char} here"),
399                );
400            }
401
402            arg = Some((j, p, idents, modif));
403        } else if let Some(p) = arg_parsers
404            .iter()
405            .position(|ap| char == ap.delims.0.value() || char == ap.delims.1)
406        {
407            // If the char is a left delimiter, begin an argument.
408            // If it is a right delimiter, handle dangling right parameter
409            // scenarios.
410            if char == arg_parsers[p].delims.0.value() {
411                push_new_ident = true;
412                arg = Some((i, p, Vec::new(), None));
413            } else if let Some((j, unescaped)) = unescaped_rhs {
414                // Double delimiters are escaped.
415                if char == unescaped {
416                    unescaped_rhs = None;
417                    extend_str_arg(&mut args, i);
418                } else {
419                    return compile_err(
420                        str_span(j..j + 1),
421                        format!("invalid format string: unmatched {unescaped} found"),
422                    );
423                }
424            } else {
425                unescaped_rhs = Some((i, char));
426            }
427        } else if let Some((j, unescaped)) = unescaped_rhs {
428            return compile_err(
429                str_span(j..j + 1),
430                format!("invalid format string: unmatched {unescaped} found"),
431            );
432        } else {
433            extend_str_arg(&mut args, i);
434        }
435    }
436
437    if let Some((i, unescaped)) = unescaped_rhs {
438        return compile_err(
439            str_span(i..i + 1),
440            format!("invalid format string: unmatched {unescaped} found"),
441        );
442    }
443
444    let expr = fmt_like.initial;
445    let mut token_stream = quote! { #expr };
446
447    let positional_provided = fmt_like.exprs.len();
448    let mut exprs = fmt_like.exprs.into_iter();
449
450    for arg in args {
451        token_stream = match arg {
452            Arg::Str(range) => {
453                let str = unsafe { str::from_utf8_unchecked(&str.as_bytes()[range.clone()]) };
454                let str = LitStr::new(str, str_span(range));
455                let parser = &fmt_like.str_parser;
456
457                quote! {
458                    #parser!(#token_stream, #str)
459                }
460            }
461            Arg::Positional(p, range, modif) => {
462                if let Some(expr) = exprs.next() {
463                    let parser = &fmt_like.arg_parsers[p].parser;
464
465                    quote! {
466                        #parser!(#token_stream, #modif, #expr)
467                    }
468                } else {
469                    let npl = if positional_needed == 1 { "" } else { "s" };
470                    let pverb = if positional_provided == 1 {
471                        "is"
472                    } else {
473                        "are"
474                    };
475                    let ppl = if positional_provided == 1 { "" } else { "s" };
476
477                    return compile_err(
478                        str_span(range),
479                        format!(
480                            "{positional_needed} positional argument{npl} in format string, but there {pverb} {positional_provided} argument{ppl}"
481                        ),
482                    );
483                }
484            }
485            Arg::Inlined(p, idents, modif) => {
486                let parser = &fmt_like.arg_parsers[p].parser;
487
488                quote! {
489                    #parser!(#token_stream, #modif, #(#idents).*)
490                }
491            }
492        }
493    }
494
495    // There should be no positional arguments left.
496    if let Some(expr) = exprs.next() {
497        return compile_err(expr.span(), "argument never used");
498    }
499
500    token_stream.into()
501}
502
503struct ArgParser {
504    delims: (LitChar, char),
505    parser: Path,
506    inline_only: bool,
507}
508
509impl ArgParser {
510    fn new(input: &ParseBuffer) -> syn::Result<Self> {
511        const VALID_DELIMS: &[[char; 2]] = &[['{', '}'], ['(', ')'], ['[', ']'], ['<', '>']];
512        let elems;
513        parenthesized!(elems in input);
514
515        let delims = {
516            let left: LitChar = elems.parse()?;
517
518            if let Some([_, right]) = VALID_DELIMS.iter().find(|[rhs, _]| left.value() == *rhs) {
519                (left, *right)
520            } else {
521                return Err(syn::Error::new_spanned(left, "is not a valid delimiter"));
522            }
523        };
524
525        elems.parse::<Token![,]>()?;
526        let parser = elems.parse()?;
527        elems.parse::<Token![,]>()?;
528        let inline_only = elems.parse::<LitBool>()?.value();
529
530        Ok(Self { delims, parser, inline_only })
531    }
532}
533
534struct FormatLike {
535    str_parser: Path,
536    arg_parsers: Vec<ArgParser>,
537    initial: Expr,
538    str: LitStr,
539    exprs: Vec<Expr>,
540}
541
542impl Parse for FormatLike {
543    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
544        let str_parser = input.parse()?;
545        input.parse::<Token![,]>()?;
546
547        let arg_parsers: Vec<ArgParser> = {
548            let arg_parsers;
549            bracketed!(arg_parsers in input);
550            arg_parsers
551                .parse_terminated(ArgParser::new, Token![,])?
552                .into_iter()
553                .collect()
554        };
555
556        if let Some((lhs, rhs)) = arg_parsers.iter().enumerate().find_map(|(i, lhs)| {
557            arg_parsers.iter().enumerate().find_map(|(j, rhs)| {
558                (i != j)
559                    .then(|| (rhs.delims.1 == lhs.delims.1).then_some((lhs, rhs)))
560                    .flatten()
561            })
562        }) {
563            let l_err = syn::Error::new_spanned(&lhs.delims.0, "this delimiter");
564            let mut r_err = syn::Error::new_spanned(&rhs.delims.0, "is the same as this");
565            r_err.combine(l_err);
566            return Err(r_err);
567        }
568        input.parse::<Token![,]>()?;
569
570        let initial = input.parse()?;
571        input.parse::<Token![,]>()?;
572
573        let str = input.parse()?;
574
575        let exprs = if !input.is_empty() {
576            input.parse::<Token![,]>()?;
577            input
578                .parse_terminated(Expr::parse, Token![,])?
579                .into_iter()
580                .collect()
581        } else {
582            Vec::new()
583        };
584
585        Ok(Self {
586            str_parser,
587            arg_parsers,
588            initial,
589            str,
590            exprs,
591        })
592    }
593}
594
595enum Arg {
596    Str(Range<usize>),
597    Positional(usize, Range<usize>, proc_macro2::TokenStream),
598    Inlined(usize, Vec<Ident>, proc_macro2::TokenStream),
599}
600
601fn extend_str_arg(args: &mut Vec<Arg>, start_of_char: usize) {
602    if let Some(Arg::Str(range)) = args.last_mut() {
603        range.end = start_of_char + 1;
604    } else {
605        args.push(Arg::Str(start_of_char..start_of_char + 1))
606    }
607}
608
609fn compile_err(span: Span, msg: impl std::fmt::Display) -> TokenStream {
610    let compile_err = syn::Error::new(span, msg).into_compile_error();
611
612    let err = quote! {{
613        #compile_err
614    }};
615
616    err.into()
617}
618
619type CurrentArg = (usize, usize, Vec<Range<usize>>, Option<Range<usize>>);