buffa_codegen/idents.rs
1//! Rust identifier and path construction helpers.
2//!
3//! These are shared between buffa's codegen and downstream code generators
4//! (e.g. `connectrpc-codegen`) that emit Rust code alongside buffa's message
5//! types and need identical keyword-escaping and path-tokenization behavior.
6//!
7//! The guarantee is that if buffa generates `pub struct r#type::Foo { ... }`,
8//! downstream callers using [`rust_path_to_tokens`]`("type::Foo")` produce the
9//! matching `r#type::Foo` reference.
10
11use proc_macro2::{Ident, Span, TokenStream};
12use quote::{format_ident, quote};
13
14/// Parse a `::`-separated Rust path string into a [`TokenStream`], using raw
15/// identifiers (`r#type`) for segments that are Rust keywords.
16///
17/// Used instead of `syn::parse_str::<syn::Type>` because the latter cannot
18/// handle raw identifiers in path position: `"google::type::LatLng"` would
19/// fail to parse because `type` is a keyword, but this function correctly
20/// produces `google::r#type::LatLng`.
21///
22/// Path-position keywords (`self`, `super`, `Self`, `crate`) are emitted as
23/// plain idents (they're valid in paths) — this differs from
24/// [`make_field_ident`], which suffixes them with `_`.
25///
26/// Leading `::` (absolute path, e.g. `"::buffa::Message"`) is preserved.
27///
28/// # Panics
29///
30/// Panics (in debug) if `path` is empty.
31pub fn rust_path_to_tokens(path: &str) -> TokenStream {
32 debug_assert!(
33 !path.is_empty(),
34 "rust_path_to_tokens called with empty path"
35 );
36
37 // Handle absolute paths (starting with `::`, e.g. extern crate paths).
38 let (prefix, rest) = if let Some(stripped) = path.strip_prefix("::") {
39 (quote! { :: }, stripped)
40 } else {
41 (TokenStream::new(), path)
42 };
43
44 // For path segments, non-raw-able keywords (`self`, `super`, `Self`,
45 // `crate`) are emitted as plain idents because they are valid in path
46 // position. This differs from `make_field_ident`, which appends `_` for
47 // these keywords since they are invalid as struct field names.
48 let segments: Vec<Ident> = rest
49 .split("::")
50 .map(|seg| {
51 if is_rust_keyword(seg) && can_be_raw_ident(seg) {
52 Ident::new_raw(seg, Span::call_site())
53 } else {
54 Ident::new(seg, Span::call_site())
55 }
56 })
57 .collect();
58
59 quote! { #prefix #(#segments)::* }
60}
61
62/// Create a field identifier, escaping Rust keywords.
63///
64/// Most keywords use raw identifiers (`r#type`). The keywords `self`, `super`,
65/// `Self`, `crate` cannot be raw identifiers and are suffixed with `_` instead
66/// (e.g. `self_`), matching prost's convention.
67pub fn make_field_ident(name: &str) -> Ident {
68 if is_rust_keyword(name) {
69 if can_be_raw_ident(name) {
70 Ident::new_raw(name, Span::call_site())
71 } else {
72 format_ident!("{}_", name)
73 }
74 } else {
75 format_ident!("{}", name)
76 }
77}
78
79/// Escape a proto package segment for use as a Rust `mod` name.
80///
81/// Returns `r#` prefix for raw-able keywords, `_` suffix for path-position
82/// keywords (which can't be raw), and the name as-is otherwise.
83///
84/// This is a `String` (not `Ident`) because callers typically emit it into
85/// source text (e.g. `pub mod {name} { ... }` via `format!`), not via `quote!`.
86pub fn escape_mod_ident(name: &str) -> String {
87 if is_rust_keyword(name) {
88 if can_be_raw_ident(name) {
89 format!("r#{name}")
90 } else {
91 format!("{name}_")
92 }
93 } else {
94 name.to_string()
95 }
96}
97
98/// Is `name` a Rust keyword (strict, edition-2018+, edition-2024+, or reserved)?
99///
100/// Covers all editions up to 2024. See `scripts/check-keywords.py` for the
101/// maintenance script that diffs this list against the upstream rustc source.
102pub fn is_rust_keyword(name: &str) -> bool {
103 matches!(
104 name,
105 // Strict keywords — all editions
106 "as" | "break"
107 | "const"
108 | "continue"
109 | "crate"
110 | "else"
111 | "enum"
112 | "extern"
113 | "false"
114 | "fn"
115 | "for"
116 | "if"
117 | "impl"
118 | "in"
119 | "let"
120 | "loop"
121 | "match"
122 | "mod"
123 | "move"
124 | "mut"
125 | "pub"
126 | "ref"
127 | "return"
128 | "self"
129 | "Self"
130 | "static"
131 | "struct"
132 | "super"
133 | "trait"
134 | "true"
135 | "type"
136 | "unsafe"
137 | "use"
138 | "where"
139 | "while"
140 // Strict keywords — edition 2018+
141 | "async"
142 | "await"
143 | "dyn"
144 // Strict keywords — edition 2024+
145 | "gen"
146 // Reserved for future use (all editions)
147 | "abstract"
148 | "become"
149 | "box"
150 | "do"
151 | "final"
152 | "macro"
153 | "override"
154 | "priv"
155 | "try"
156 | "typeof"
157 | "unsized"
158 | "virtual"
159 | "yield"
160 )
161}
162
163/// Can `name` be used as a raw identifier (`r#name`)?
164///
165/// `self`, `super`, `Self`, `crate` are valid path segments and cannot be
166/// prefixed with `r#`. They get a `_` suffix in field/mod position instead.
167fn can_be_raw_ident(name: &str) -> bool {
168 !matches!(name, "self" | "super" | "Self" | "crate")
169}
170
171#[cfg(test)]
172mod tests {
173 use super::*;
174
175 #[test]
176 fn rust_path_simple() {
177 assert_eq!(rust_path_to_tokens("Foo").to_string(), "Foo");
178 }
179
180 #[test]
181 fn rust_path_nested() {
182 assert_eq!(
183 rust_path_to_tokens("foo::bar::Baz").to_string(),
184 "foo :: bar :: Baz"
185 );
186 }
187
188 #[test]
189 fn rust_path_keyword_segment() {
190 // `type` is a keyword → raw identifier.
191 assert_eq!(
192 rust_path_to_tokens("google::type::LatLng").to_string(),
193 "google :: r#type :: LatLng"
194 );
195 }
196
197 #[test]
198 fn rust_path_absolute() {
199 assert_eq!(
200 rust_path_to_tokens("::buffa::Message").to_string(),
201 ":: buffa :: Message"
202 );
203 }
204
205 #[test]
206 fn rust_path_super_segment() {
207 // `super` is valid in path position → plain ident (no r# or _).
208 assert_eq!(
209 rust_path_to_tokens("super::super::Foo").to_string(),
210 "super :: super :: Foo"
211 );
212 }
213
214 #[test]
215 fn field_ident_normal() {
216 assert_eq!(make_field_ident("foo").to_string(), "foo");
217 }
218
219 #[test]
220 fn field_ident_keyword() {
221 assert_eq!(make_field_ident("type").to_string(), "r#type");
222 }
223
224 #[test]
225 fn field_ident_non_raw_keyword() {
226 // `self` can't be r#self → suffixed.
227 assert_eq!(make_field_ident("self").to_string(), "self_");
228 assert_eq!(make_field_ident("super").to_string(), "super_");
229 assert_eq!(make_field_ident("crate").to_string(), "crate_");
230 assert_eq!(make_field_ident("Self").to_string(), "Self_");
231 }
232
233 #[test]
234 fn escape_mod_normal() {
235 assert_eq!(escape_mod_ident("foo"), "foo");
236 }
237
238 #[test]
239 fn escape_mod_keyword() {
240 assert_eq!(escape_mod_ident("type"), "r#type");
241 assert_eq!(escape_mod_ident("async"), "r#async");
242 }
243
244 #[test]
245 fn escape_mod_non_raw_keyword() {
246 assert_eq!(escape_mod_ident("self"), "self_");
247 assert_eq!(escape_mod_ident("super"), "super_");
248 }
249
250 #[test]
251 fn keyword_coverage() {
252 assert!(is_rust_keyword("type"));
253 assert!(is_rust_keyword("async"));
254 assert!(is_rust_keyword("gen")); // 2024
255 assert!(is_rust_keyword("yield")); // reserved
256 assert!(!is_rust_keyword("foo"));
257 assert!(!is_rust_keyword("Type")); // case-sensitive
258 }
259}