Skip to main content

buffa_codegen/
idents.rs

1//! Rust identifier and path construction helpers.
2//!
3//! These are shared between buffa's codegen and downstream code generators
4//! (e.g. `connectrpc-codegen`) that emit Rust code alongside buffa's message
5//! types and need identical keyword-escaping and path-tokenization behavior.
6//!
7//! The guarantee is that if buffa generates `pub struct r#type::Foo { ... }`,
8//! downstream callers using [`rust_path_to_tokens`]`("type::Foo")` produce the
9//! matching `r#type::Foo` reference.
10
11use proc_macro2::{Ident, Span, TokenStream};
12use quote::{format_ident, quote};
13
14/// Parse a `::`-separated Rust path string into a [`TokenStream`], using raw
15/// identifiers (`r#type`) for segments that are Rust keywords.
16///
17/// Used instead of `syn::parse_str::<syn::Type>` because the latter cannot
18/// handle raw identifiers in path position: `"google::type::LatLng"` would
19/// fail to parse because `type` is a keyword, but this function correctly
20/// produces `google::r#type::LatLng`.
21///
22/// Path-position keywords (`self`, `super`, `Self`, `crate`) are emitted as
23/// plain idents (they're valid in paths) — this differs from
24/// [`make_field_ident`], which suffixes them with `_`.
25///
26/// Leading `::` (absolute path, e.g. `"::buffa::Message"`) is preserved.
27///
28/// # Panics
29///
30/// Panics (in debug) if `path` is empty.
31pub fn rust_path_to_tokens(path: &str) -> TokenStream {
32    debug_assert!(
33        !path.is_empty(),
34        "rust_path_to_tokens called with empty path"
35    );
36
37    // Handle absolute paths (starting with `::`, e.g. extern crate paths).
38    let (prefix, rest) = if let Some(stripped) = path.strip_prefix("::") {
39        (quote! { :: }, stripped)
40    } else {
41        (TokenStream::new(), path)
42    };
43
44    // For path segments, non-raw-able keywords (`self`, `super`, `Self`,
45    // `crate`) are emitted as plain idents because they are valid in path
46    // position. This differs from `make_field_ident`, which appends `_` for
47    // these keywords since they are invalid as struct field names.
48    let segments: Vec<Ident> = rest
49        .split("::")
50        .map(|seg| {
51            if is_rust_keyword(seg) && can_be_raw_ident(seg) {
52                Ident::new_raw(seg, Span::call_site())
53            } else {
54                Ident::new(seg, Span::call_site())
55            }
56        })
57        .collect();
58
59    quote! { #prefix #(#segments)::* }
60}
61
62/// Create a field identifier, escaping Rust keywords.
63///
64/// Most keywords use raw identifiers (`r#type`). The keywords `self`, `super`,
65/// `Self`, `crate` cannot be raw identifiers and are suffixed with `_` instead
66/// (e.g. `self_`), matching prost's convention.
67pub fn make_field_ident(name: &str) -> Ident {
68    if is_rust_keyword(name) {
69        if can_be_raw_ident(name) {
70            Ident::new_raw(name, Span::call_site())
71        } else {
72            format_ident!("{}_", name)
73        }
74    } else {
75        format_ident!("{}", name)
76    }
77}
78
79/// Escape a proto package segment for use as a Rust `mod` name.
80///
81/// Returns `r#` prefix for raw-able keywords, `_` suffix for path-position
82/// keywords (which can't be raw), and the name as-is otherwise.
83///
84/// This is a `String` (not `Ident`) because callers typically emit it into
85/// source text (e.g. `pub mod {name} { ... }` via `format!`), not via `quote!`.
86pub fn escape_mod_ident(name: &str) -> String {
87    if is_rust_keyword(name) {
88        if can_be_raw_ident(name) {
89            format!("r#{name}")
90        } else {
91            format!("{name}_")
92        }
93    } else {
94        name.to_string()
95    }
96}
97
98/// Is `name` a Rust keyword (strict, edition-2018+, edition-2024+, or reserved)?
99///
100/// Covers all editions up to 2024. See `scripts/check-keywords.py` for the
101/// maintenance script that diffs this list against the upstream rustc source.
102pub fn is_rust_keyword(name: &str) -> bool {
103    matches!(
104        name,
105        // Strict keywords — all editions
106        "as" | "break"
107            | "const"
108            | "continue"
109            | "crate"
110            | "else"
111            | "enum"
112            | "extern"
113            | "false"
114            | "fn"
115            | "for"
116            | "if"
117            | "impl"
118            | "in"
119            | "let"
120            | "loop"
121            | "match"
122            | "mod"
123            | "move"
124            | "mut"
125            | "pub"
126            | "ref"
127            | "return"
128            | "self"
129            | "Self"
130            | "static"
131            | "struct"
132            | "super"
133            | "trait"
134            | "true"
135            | "type"
136            | "unsafe"
137            | "use"
138            | "where"
139            | "while"
140            // Strict keywords — edition 2018+
141            | "async"
142            | "await"
143            | "dyn"
144            // Strict keywords — edition 2024+
145            | "gen"
146            // Reserved for future use (all editions)
147            | "abstract"
148            | "become"
149            | "box"
150            | "do"
151            | "final"
152            | "macro"
153            | "override"
154            | "priv"
155            | "try"
156            | "typeof"
157            | "unsized"
158            | "virtual"
159            | "yield"
160    )
161}
162
163/// Can `name` be used as a raw identifier (`r#name`)?
164///
165/// `self`, `super`, `Self`, `crate` are valid path segments and cannot be
166/// prefixed with `r#`. They get a `_` suffix in field/mod position instead.
167fn can_be_raw_ident(name: &str) -> bool {
168    !matches!(name, "self" | "super" | "Self" | "crate")
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn rust_path_simple() {
177        assert_eq!(rust_path_to_tokens("Foo").to_string(), "Foo");
178    }
179
180    #[test]
181    fn rust_path_nested() {
182        assert_eq!(
183            rust_path_to_tokens("foo::bar::Baz").to_string(),
184            "foo :: bar :: Baz"
185        );
186    }
187
188    #[test]
189    fn rust_path_keyword_segment() {
190        // `type` is a keyword → raw identifier.
191        assert_eq!(
192            rust_path_to_tokens("google::type::LatLng").to_string(),
193            "google :: r#type :: LatLng"
194        );
195    }
196
197    #[test]
198    fn rust_path_absolute() {
199        assert_eq!(
200            rust_path_to_tokens("::buffa::Message").to_string(),
201            ":: buffa :: Message"
202        );
203    }
204
205    #[test]
206    fn rust_path_super_segment() {
207        // `super` is valid in path position → plain ident (no r# or _).
208        assert_eq!(
209            rust_path_to_tokens("super::super::Foo").to_string(),
210            "super :: super :: Foo"
211        );
212    }
213
214    #[test]
215    fn field_ident_normal() {
216        assert_eq!(make_field_ident("foo").to_string(), "foo");
217    }
218
219    #[test]
220    fn field_ident_keyword() {
221        assert_eq!(make_field_ident("type").to_string(), "r#type");
222    }
223
224    #[test]
225    fn field_ident_non_raw_keyword() {
226        // `self` can't be r#self → suffixed.
227        assert_eq!(make_field_ident("self").to_string(), "self_");
228        assert_eq!(make_field_ident("super").to_string(), "super_");
229        assert_eq!(make_field_ident("crate").to_string(), "crate_");
230        assert_eq!(make_field_ident("Self").to_string(), "Self_");
231    }
232
233    #[test]
234    fn escape_mod_normal() {
235        assert_eq!(escape_mod_ident("foo"), "foo");
236    }
237
238    #[test]
239    fn escape_mod_keyword() {
240        assert_eq!(escape_mod_ident("type"), "r#type");
241        assert_eq!(escape_mod_ident("async"), "r#async");
242    }
243
244    #[test]
245    fn escape_mod_non_raw_keyword() {
246        assert_eq!(escape_mod_ident("self"), "self_");
247        assert_eq!(escape_mod_ident("super"), "super_");
248    }
249
250    #[test]
251    fn keyword_coverage() {
252        assert!(is_rust_keyword("type"));
253        assert!(is_rust_keyword("async"));
254        assert!(is_rust_keyword("gen")); // 2024
255        assert!(is_rust_keyword("yield")); // reserved
256        assert!(!is_rust_keyword("foo"));
257        assert!(!is_rust_keyword("Type")); // case-sensitive
258    }
259}