buffa_codegen/idents.rs
1//! Rust identifier and path construction helpers.
2//!
3//! These are shared between buffa's codegen and downstream code generators
4//! (e.g. `connectrpc-codegen`) that emit Rust code alongside buffa's message
5//! types and need identical keyword-escaping and path-tokenization behavior.
6//!
7//! The guarantee is that if buffa generates `pub struct r#type::Foo { ... }`,
8//! downstream callers using [`rust_path_to_tokens`]`("type::Foo")` produce the
9//! matching `r#type::Foo` reference.
10
11use proc_macro2::{Ident, Span, TokenStream};
12use quote::{format_ident, quote};
13
14/// Parse a `::`-separated Rust path string into a [`TokenStream`], using raw
15/// identifiers (`r#type`) for segments that are Rust keywords.
16///
17/// Used instead of `syn::parse_str::<syn::Type>` because the latter cannot
18/// handle raw identifiers in path position: `"google::type::LatLng"` would
19/// fail to parse because `type` is a keyword, but this function correctly
20/// produces `google::r#type::LatLng`.
21///
22/// Path-position keywords (`self`, `super`, `Self`, `crate`) are emitted as
23/// plain idents (they're valid in paths) — this differs from
24/// [`make_field_ident`], which suffixes them with `_`.
25///
26/// Leading `::` (absolute path, e.g. `"::buffa::Message"`) is preserved.
27///
28/// # Panics
29///
30/// Panics (in debug) if `path` is empty.
31pub fn rust_path_to_tokens(path: &str) -> TokenStream {
32 debug_assert!(
33 !path.is_empty(),
34 "rust_path_to_tokens called with empty path"
35 );
36
37 // Handle absolute paths (starting with `::`, e.g. extern crate paths).
38 let (prefix, rest) = if let Some(stripped) = path.strip_prefix("::") {
39 (quote! { :: }, stripped)
40 } else {
41 (TokenStream::new(), path)
42 };
43
44 // For path segments, non-raw-able keywords (`self`, `super`, `Self`,
45 // `crate`) are emitted as plain idents because they are valid in path
46 // position. This differs from `make_field_ident`, which appends `_` for
47 // these keywords since they are invalid as struct field names.
48 let segments: Vec<Ident> = rest
49 .split("::")
50 .map(|seg| {
51 if is_rust_keyword(seg) && can_be_raw_ident(seg) {
52 Ident::new_raw(seg, Span::call_site())
53 } else {
54 Ident::new(seg, Span::call_site())
55 }
56 })
57 .collect();
58
59 quote! { #prefix #(#segments)::* }
60}
61
62/// Create a field identifier, escaping Rust keywords.
63///
64/// Most keywords use raw identifiers (`r#type`). The keywords `self`, `super`,
65/// `Self`, `crate` cannot be raw identifiers and are suffixed with `_` instead
66/// (e.g. `self_`), matching prost's convention.
67pub fn make_field_ident(name: &str) -> Ident {
68 if is_rust_keyword(name) {
69 if can_be_raw_ident(name) {
70 Ident::new_raw(name, Span::call_site())
71 } else {
72 format_ident!("{}_", name)
73 }
74 } else {
75 format_ident!("{}", name)
76 }
77}
78
79/// Convert a protobuf enum value name to `UpperCamelCase`.
80///
81/// Word boundaries are underscores **and** case transitions, so the conversion
82/// works on the canonical `SHOUTY_SNAKE_CASE` (`RULE_LEVEL_HIGH` → `RuleLevelHigh`)
83/// as well as non-canonical mixed-case inputs: a lower→upper transition starts a
84/// word (`myValue` → `MyValue`) and an acronym ends a word at the upper→lower
85/// transition (`HTTPServer` → `HttpServer`). Each word's first character is
86/// upper-cased and the rest lower-cased.
87///
88/// The conversion is intentionally lossy: `FOO_BAR` and `FOO__BAR` both collapse
89/// to `FooBar`, and `HTTPServer` and `HTTP_SERVER` both produce `HttpServer`. The
90/// caller is responsible for detecting the resulting collisions.
91///
92/// A leading digit in the output is only reachable when the caller has stripped
93/// a prefix first (e.g. `VERSION_2` → `2`); it is preserved verbatim, so callers
94/// that need a valid Rust identifier must check for it themselves.
95#[must_use]
96pub fn to_upper_camel_case(s: &str) -> String {
97 let chars: Vec<char> = s.chars().collect();
98 let mut out = String::new();
99 let mut start_of_word = true;
100 for (i, &ch) in chars.iter().enumerate() {
101 if ch == '_' {
102 start_of_word = true;
103 continue;
104 }
105 // Within a run of non-underscore characters, detect a word boundary at
106 // case transitions so mixed-case input splits correctly.
107 if !start_of_word && i > 0 {
108 let prev = chars[i - 1];
109 let lower_to_upper = prev.is_lowercase() && ch.is_uppercase();
110 let acronym_end = prev.is_uppercase()
111 && ch.is_uppercase()
112 && chars.get(i + 1).is_some_and(|c| c.is_lowercase());
113 if lower_to_upper || acronym_end {
114 start_of_word = true;
115 }
116 }
117 if start_of_word {
118 out.extend(ch.to_uppercase());
119 start_of_word = false;
120 } else {
121 out.extend(ch.to_lowercase());
122 }
123 }
124 out
125}
126
127/// Convert a type name to `SHOUTY_SNAKE_CASE`.
128///
129/// Used to reconstruct the conventional enum-value prefix from an enum's proto
130/// name so it can be stripped: `RuleLevel` → `RULE_LEVEL` (then values like
131/// `RULE_LEVEL_HIGH` lose the `RULE_LEVEL_` prefix). An underscore is inserted
132/// at each lower→upper boundary and at acronym→word boundaries
133/// (`HTTPServer` → `HTTP_SERVER`); existing underscores are preserved without
134/// doubling.
135#[must_use]
136pub fn to_shouty_snake_case(s: &str) -> String {
137 let chars: Vec<char> = s.chars().collect();
138 let mut out = String::new();
139 for (i, &ch) in chars.iter().enumerate() {
140 if ch == '_' {
141 out.push('_');
142 continue;
143 }
144 if i > 0 && ch.is_uppercase() && chars[i - 1] != '_' {
145 let prev = chars[i - 1];
146 let prev_starts_word = prev.is_lowercase() || prev.is_ascii_digit();
147 let acronym_boundary =
148 prev.is_uppercase() && chars.get(i + 1).is_some_and(|c| c.is_lowercase());
149 if prev_starts_word || acronym_boundary {
150 out.push('_');
151 }
152 }
153 out.extend(ch.to_uppercase());
154 }
155 out
156}
157
158/// Escape a proto package segment for use as a Rust `mod` name.
159///
160/// Returns `r#` prefix for raw-able keywords, `_` suffix for path-position
161/// keywords (which can't be raw), and the name as-is otherwise.
162///
163/// This is a `String` (not `Ident`) because callers typically emit it into
164/// source text (e.g. `pub mod {name} { ... }` via `format!`), not via `quote!`.
165pub fn escape_mod_ident(name: &str) -> String {
166 if is_rust_keyword(name) {
167 if can_be_raw_ident(name) {
168 format!("r#{name}")
169 } else {
170 format!("{name}_")
171 }
172 } else {
173 name.to_string()
174 }
175}
176
177/// Is `name` a Rust keyword (strict, edition-2018+, edition-2024+, or reserved)?
178///
179/// Covers all editions up to 2024. See `scripts/check-keywords.py` for the
180/// maintenance script that diffs this list against the upstream rustc source.
181pub fn is_rust_keyword(name: &str) -> bool {
182 matches!(
183 name,
184 // Strict keywords — all editions
185 "as" | "break"
186 | "const"
187 | "continue"
188 | "crate"
189 | "else"
190 | "enum"
191 | "extern"
192 | "false"
193 | "fn"
194 | "for"
195 | "if"
196 | "impl"
197 | "in"
198 | "let"
199 | "loop"
200 | "match"
201 | "mod"
202 | "move"
203 | "mut"
204 | "pub"
205 | "ref"
206 | "return"
207 | "self"
208 | "Self"
209 | "static"
210 | "struct"
211 | "super"
212 | "trait"
213 | "true"
214 | "type"
215 | "unsafe"
216 | "use"
217 | "where"
218 | "while"
219 // Strict keywords — edition 2018+
220 | "async"
221 | "await"
222 | "dyn"
223 // Strict keywords — edition 2024+
224 | "gen"
225 // Reserved for future use (all editions)
226 | "abstract"
227 | "become"
228 | "box"
229 | "do"
230 | "final"
231 | "macro"
232 | "override"
233 | "priv"
234 | "try"
235 | "typeof"
236 | "unsized"
237 | "virtual"
238 | "yield"
239 )
240}
241
242/// Can `name` be used as a raw identifier (`r#name`)?
243///
244/// `self`, `super`, `Self`, `crate` are valid path segments and cannot be
245/// prefixed with `r#`. They get a `_` suffix in field/mod position instead.
246fn can_be_raw_ident(name: &str) -> bool {
247 !matches!(name, "self" | "super" | "Self" | "crate")
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn rust_path_simple() {
256 assert_eq!(rust_path_to_tokens("Foo").to_string(), "Foo");
257 }
258
259 #[test]
260 fn rust_path_nested() {
261 assert_eq!(
262 rust_path_to_tokens("foo::bar::Baz").to_string(),
263 "foo :: bar :: Baz"
264 );
265 }
266
267 #[test]
268 fn rust_path_keyword_segment() {
269 // `type` is a keyword → raw identifier.
270 assert_eq!(
271 rust_path_to_tokens("google::type::LatLng").to_string(),
272 "google :: r#type :: LatLng"
273 );
274 }
275
276 #[test]
277 fn rust_path_absolute() {
278 assert_eq!(
279 rust_path_to_tokens("::buffa::Message").to_string(),
280 ":: buffa :: Message"
281 );
282 }
283
284 #[test]
285 fn rust_path_super_segment() {
286 // `super` is valid in path position → plain ident (no r# or _).
287 assert_eq!(
288 rust_path_to_tokens("super::super::Foo").to_string(),
289 "super :: super :: Foo"
290 );
291 }
292
293 #[test]
294 fn field_ident_normal() {
295 assert_eq!(make_field_ident("foo").to_string(), "foo");
296 }
297
298 #[test]
299 fn field_ident_keyword() {
300 assert_eq!(make_field_ident("type").to_string(), "r#type");
301 }
302
303 #[test]
304 fn field_ident_non_raw_keyword() {
305 // `self` can't be r#self → suffixed.
306 assert_eq!(make_field_ident("self").to_string(), "self_");
307 assert_eq!(make_field_ident("super").to_string(), "super_");
308 assert_eq!(make_field_ident("crate").to_string(), "crate_");
309 assert_eq!(make_field_ident("Self").to_string(), "Self_");
310 }
311
312 #[test]
313 fn escape_mod_normal() {
314 assert_eq!(escape_mod_ident("foo"), "foo");
315 }
316
317 #[test]
318 fn escape_mod_keyword() {
319 assert_eq!(escape_mod_ident("type"), "r#type");
320 assert_eq!(escape_mod_ident("async"), "r#async");
321 }
322
323 #[test]
324 fn escape_mod_non_raw_keyword() {
325 assert_eq!(escape_mod_ident("self"), "self_");
326 assert_eq!(escape_mod_ident("super"), "super_");
327 }
328
329 #[test]
330 fn upper_camel_basic() {
331 assert_eq!(to_upper_camel_case("RULE_LEVEL_HIGH"), "RuleLevelHigh");
332 assert_eq!(to_upper_camel_case("UNKNOWN"), "Unknown");
333 assert_eq!(to_upper_camel_case("low_priority"), "LowPriority");
334 assert_eq!(to_upper_camel_case("HTTP_SERVER"), "HttpServer");
335 }
336
337 #[test]
338 fn upper_camel_lossy_collisions() {
339 // Doubled and absent underscores collapse to the same identifier — the
340 // caller must detect this.
341 assert_eq!(to_upper_camel_case("FOO_BAR"), "FooBar");
342 assert_eq!(to_upper_camel_case("FOO__BAR"), "FooBar");
343 // Acronym vs snake also collapse — both must resolve to one identifier
344 // so the caller can detect the collision.
345 assert_eq!(to_upper_camel_case("HTTPServer"), "HttpServer");
346 assert_eq!(to_upper_camel_case("HTTP_SERVER"), "HttpServer");
347 }
348
349 #[test]
350 fn upper_camel_mixed_case_input() {
351 // Case transitions are word boundaries, so an already-CamelCase value
352 // round-trips (and is later skipped as a redundant alias).
353 assert_eq!(to_upper_camel_case("MyValue"), "MyValue");
354 assert_eq!(to_upper_camel_case("fooBar"), "FooBar");
355 assert_eq!(to_upper_camel_case("Active"), "Active");
356 }
357
358 #[test]
359 fn upper_camel_digit_and_empty() {
360 // Reachable only after a prefix strip; preserved verbatim for the
361 // caller's validity check.
362 assert_eq!(to_upper_camel_case("2"), "2");
363 assert_eq!(to_upper_camel_case(""), "");
364 assert_eq!(to_upper_camel_case("FOO_2"), "Foo2");
365 }
366
367 #[test]
368 fn upper_camel_keyword_source() {
369 // `SELF` folds to the keyword `Self`; identifier escaping is the
370 // caller's job (via `make_field_ident`).
371 assert_eq!(to_upper_camel_case("SELF"), "Self");
372 }
373
374 #[test]
375 fn shouty_snake_basic() {
376 assert_eq!(to_shouty_snake_case("RuleLevel"), "RULE_LEVEL");
377 assert_eq!(to_shouty_snake_case("NullValue"), "NULL_VALUE");
378 assert_eq!(to_shouty_snake_case("Type"), "TYPE");
379 }
380
381 #[test]
382 fn shouty_snake_acronym() {
383 assert_eq!(to_shouty_snake_case("HTTPServer"), "HTTP_SERVER");
384 }
385
386 #[test]
387 fn shouty_snake_already_snakey() {
388 // Idempotent on names that already carry underscores.
389 assert_eq!(to_shouty_snake_case("RULE_LEVEL"), "RULE_LEVEL");
390 }
391
392 #[test]
393 fn keyword_coverage() {
394 assert!(is_rust_keyword("type"));
395 assert!(is_rust_keyword("async"));
396 assert!(is_rust_keyword("gen")); // 2024
397 assert!(is_rust_keyword("yield")); // reserved
398 assert!(!is_rust_keyword("foo"));
399 assert!(!is_rust_keyword("Type")); // case-sensitive
400 }
401}