1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum TargetLanguage {
34 Python,
36 TypeScript,
38 Rust,
40 Ruby,
42 Php,
44}
45
46impl TargetLanguage {
47 #[must_use]
49 pub const fn reserved_keywords(self) -> &'static [&'static str] {
50 match self {
51 Self::Python => PYTHON_KEYWORDS,
52 Self::TypeScript => TYPESCRIPT_KEYWORDS,
53 Self::Rust => RUST_KEYWORDS,
54 Self::Ruby => RUBY_KEYWORDS,
55 Self::Php => PHP_KEYWORDS,
56 }
57 }
58
59 #[must_use]
61 pub const fn soft_keywords(self) -> &'static [&'static str] {
62 match self {
63 Self::Python => PYTHON_SOFT_KEYWORDS,
64 Self::TypeScript => TYPESCRIPT_SOFT_KEYWORDS,
65 Self::Rust => RUST_SOFT_KEYWORDS,
66 Self::Ruby => RUBY_SOFT_KEYWORDS,
67 Self::Php => PHP_SOFT_KEYWORDS,
68 }
69 }
70
71 #[must_use]
75 pub const fn keyword_prefix(self) -> &'static str {
76 match self {
77 Self::Rust => "r#",
78 _ => "_",
79 }
80 }
81}
82
83const PYTHON_KEYWORDS: &[&str] = &[
86 "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del",
87 "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal",
88 "not", "or", "pass", "raise", "return", "try", "while", "with", "yield",
89];
90
91const PYTHON_SOFT_KEYWORDS: &[&str] = &["match", "case", "type"];
93
94const TYPESCRIPT_KEYWORDS: &[&str] = &[
97 "abstract",
98 "any",
99 "as",
100 "async",
101 "await",
102 "boolean",
103 "break",
104 "case",
105 "catch",
106 "class",
107 "const",
108 "continue",
109 "debugger",
110 "declare",
111 "default",
112 "delete",
113 "do",
114 "else",
115 "enum",
116 "export",
117 "extends",
118 "false",
119 "finally",
120 "for",
121 "from",
122 "function",
123 "get",
124 "global",
125 "if",
126 "implements",
127 "import",
128 "in",
129 "instanceof",
130 "interface",
131 "is",
132 "keyof",
133 "let",
134 "module",
135 "namespace",
136 "never",
137 "new",
138 "null",
139 "number",
140 "of",
141 "package",
142 "private",
143 "protected",
144 "public",
145 "readonly",
146 "require",
147 "return",
148 "set",
149 "static",
150 "string",
151 "super",
152 "switch",
153 "symbol",
154 "this",
155 "throw",
156 "true",
157 "try",
158 "type",
159 "typeof",
160 "unique",
161 "var",
162 "void",
163 "while",
164 "with",
165 "yield",
166];
167
168const TYPESCRIPT_SOFT_KEYWORDS: &[&str] = &["as", "require", "get", "set", "accessor"];
170
171const RUST_KEYWORDS: &[&str] = &[
174 "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern", "false", "fn",
175 "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self",
176 "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while",
177];
178
179const RUST_SOFT_KEYWORDS: &[&str] = &[
181 "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual", "yield",
182];
183
184const RUBY_KEYWORDS: &[&str] = &[
187 "BEGIN",
188 "END",
189 "__ENCODING__",
190 "__FILE__",
191 "__LINE__",
192 "alias",
193 "and",
194 "begin",
195 "break",
196 "case",
197 "class",
198 "def",
199 "defined?",
200 "do",
201 "else",
202 "elsif",
203 "end",
204 "ensure",
205 "false",
206 "for",
207 "if",
208 "in",
209 "module",
210 "next",
211 "nil",
212 "not",
213 "or",
214 "redo",
215 "rescue",
216 "retry",
217 "return",
218 "self",
219 "super",
220 "then",
221 "true",
222 "undef",
223 "unless",
224 "until",
225 "when",
226 "while",
227 "yield",
228];
229
230const RUBY_SOFT_KEYWORDS: &[&str] = &[];
232
233const PHP_KEYWORDS: &[&str] = &[
236 "abstract",
237 "and",
238 "array",
239 "as",
240 "break",
241 "callable",
242 "case",
243 "catch",
244 "class",
245 "clone",
246 "const",
247 "continue",
248 "declare",
249 "default",
250 "die",
251 "do",
252 "echo",
253 "else",
254 "elseif",
255 "empty",
256 "enddeclare",
257 "endfor",
258 "endforeach",
259 "endif",
260 "endswitch",
261 "endwhile",
262 "eval",
263 "exit",
264 "extends",
265 "false",
266 "final",
267 "finally",
268 "fn",
269 "for",
270 "foreach",
271 "from",
272 "function",
273 "global",
274 "goto",
275 "if",
276 "implements",
277 "include",
278 "include_once",
279 "instanceof",
280 "insteadof",
281 "interface",
282 "isset",
283 "list",
284 "match",
285 "namespace",
286 "new",
287 "never",
288 "null",
289 "or",
290 "print",
291 "private",
292 "protected",
293 "public",
294 "readonly",
295 "require",
296 "require_once",
297 "return",
298 "static",
299 "switch",
300 "throw",
301 "trait",
302 "true",
303 "try",
304 "unset",
305 "use",
306 "var",
307 "while",
308 "xor",
309 "yield",
310];
311
312const PHP_SOFT_KEYWORDS: &[&str] = &["mixed", "object", "parent", "self", "static", "string", "void"];
314
315#[must_use]
357pub fn sanitize_identifier(name: &str, language: TargetLanguage) -> String {
358 if name.is_empty() {
359 return "field".to_string();
360 }
361
362 let mut ident: String = name
364 .chars()
365 .map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' })
366 .collect();
367
368 while ident.contains("__") {
370 ident = ident.replace("__", "_");
371 }
372
373 ident = ident.trim_matches('_').to_string();
375
376 if ident.is_empty() {
378 return "field".to_string();
379 }
380
381 if ident.chars().next().unwrap().is_ascii_digit() {
383 ident.insert(0, '_');
384 }
385
386 let lower_ident = ident.to_lowercase();
388
389 let is_reserved = language
391 .reserved_keywords()
392 .iter()
393 .any(|kw| kw.to_lowercase() == lower_ident)
394 || language
395 .soft_keywords()
396 .iter()
397 .any(|kw| kw.to_lowercase() == lower_ident);
398
399 if is_reserved {
400 let prefix = language.keyword_prefix();
401 format!("{prefix}{lower_ident}")
402 } else {
403 lower_ident
404 }
405}
406
407#[must_use]
431pub fn sanitize_identifier_snake_case(name: &str, language: TargetLanguage) -> String {
432 let mut result = String::new();
433 let mut prev_was_upper = false;
434
435 for (i, c) in name.chars().enumerate() {
436 if c.is_uppercase() && i > 0 && !prev_was_upper {
437 result.push('_');
438 result.push(c.to_lowercase().next().unwrap());
439 prev_was_upper = true;
440 } else if c.is_uppercase() {
441 result.push(c.to_lowercase().next().unwrap());
442 prev_was_upper = true;
443 } else {
444 result.push(c);
445 prev_was_upper = false;
446 }
447 }
448
449 sanitize_identifier(&result, language)
450}
451
452#[must_use]
476pub fn sanitize_identifier_camel_case(name: &str, language: TargetLanguage) -> String {
477 let sanitized = sanitize_identifier(name, language);
478 let parts: Vec<&str> = sanitized.split('_').collect();
479
480 if parts.is_empty() {
481 return "field".to_string();
482 }
483
484 let mut result = parts[0].to_string();
485 for part in &parts[1..] {
486 if !part.is_empty() {
487 let mut chars = part.chars();
488 if let Some(first) = chars.next() {
489 result.push_str(&first.to_uppercase().to_string());
490 result.push_str(chars.as_str());
491 }
492 }
493 }
494
495 result
496}
497
498#[must_use]
522pub fn sanitize_identifier_pascal_case(name: &str, language: TargetLanguage) -> String {
523 let sanitized = sanitize_identifier(name, language);
524 let parts: Vec<&str> = sanitized.split('_').collect();
525
526 parts
527 .iter()
528 .filter(|part| !part.is_empty())
529 .map(|part| {
530 let mut chars = part.chars();
531 match chars.next() {
532 None => String::new(),
533 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
534 }
535 })
536 .collect()
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542
543 #[test]
545 fn test_python_reserved_keywords() {
546 assert_eq!(sanitize_identifier("class", TargetLanguage::Python), "_class");
547 assert_eq!(sanitize_identifier("def", TargetLanguage::Python), "_def");
548 assert_eq!(sanitize_identifier("import", TargetLanguage::Python), "_import");
549 assert_eq!(sanitize_identifier("if", TargetLanguage::Python), "_if");
550 assert_eq!(sanitize_identifier("while", TargetLanguage::Python), "_while");
551 assert_eq!(sanitize_identifier("and", TargetLanguage::Python), "_and");
552 assert_eq!(sanitize_identifier("or", TargetLanguage::Python), "_or");
553 assert_eq!(sanitize_identifier("not", TargetLanguage::Python), "_not");
554 assert_eq!(sanitize_identifier("for", TargetLanguage::Python), "_for");
555 assert_eq!(sanitize_identifier("return", TargetLanguage::Python), "_return");
556 }
557
558 #[test]
559 fn test_python_soft_keywords() {
560 assert_eq!(sanitize_identifier("match", TargetLanguage::Python), "_match");
561 assert_eq!(sanitize_identifier("case", TargetLanguage::Python), "_case");
562 assert_eq!(sanitize_identifier("type", TargetLanguage::Python), "_type");
563 }
564
565 #[test]
566 fn test_python_non_keywords() {
567 assert_eq!(sanitize_identifier("hello", TargetLanguage::Python), "hello");
568 assert_eq!(sanitize_identifier("world", TargetLanguage::Python), "world");
569 assert_eq!(sanitize_identifier("my_var", TargetLanguage::Python), "my_var");
570 }
571
572 #[test]
574 fn test_typescript_reserved_keywords() {
575 assert_eq!(sanitize_identifier("const", TargetLanguage::TypeScript), "_const");
576 assert_eq!(sanitize_identifier("let", TargetLanguage::TypeScript), "_let");
577 assert_eq!(sanitize_identifier("var", TargetLanguage::TypeScript), "_var");
578 assert_eq!(sanitize_identifier("function", TargetLanguage::TypeScript), "_function");
579 assert_eq!(sanitize_identifier("class", TargetLanguage::TypeScript), "_class");
580 assert_eq!(
581 sanitize_identifier("interface", TargetLanguage::TypeScript),
582 "_interface"
583 );
584 assert_eq!(sanitize_identifier("type", TargetLanguage::TypeScript), "_type");
585 }
586
587 #[test]
588 fn test_typescript_non_keywords() {
589 assert_eq!(sanitize_identifier("name", TargetLanguage::TypeScript), "name");
590 assert_eq!(sanitize_identifier("userId", TargetLanguage::TypeScript), "userid");
592 assert_eq!(
593 sanitize_identifier_camel_case("user_id", TargetLanguage::TypeScript),
594 "userId"
595 );
596 }
597
598 #[test]
600 fn test_rust_reserved_keywords() {
601 assert_eq!(sanitize_identifier("fn", TargetLanguage::Rust), "r#fn");
602 assert_eq!(sanitize_identifier("let", TargetLanguage::Rust), "r#let");
603 assert_eq!(sanitize_identifier("mut", TargetLanguage::Rust), "r#mut");
604 assert_eq!(sanitize_identifier("struct", TargetLanguage::Rust), "r#struct");
605 assert_eq!(sanitize_identifier("enum", TargetLanguage::Rust), "r#enum");
606 assert_eq!(sanitize_identifier("impl", TargetLanguage::Rust), "r#impl");
607 assert_eq!(sanitize_identifier("trait", TargetLanguage::Rust), "r#trait");
608 assert_eq!(sanitize_identifier("async", TargetLanguage::Rust), "r#async");
609 assert_eq!(sanitize_identifier("await", TargetLanguage::Rust), "r#await");
610 }
611
612 #[test]
613 fn test_rust_non_keywords() {
614 assert_eq!(sanitize_identifier("main", TargetLanguage::Rust), "main");
615 assert_eq!(sanitize_identifier("my_function", TargetLanguage::Rust), "my_function");
616 }
617
618 #[test]
620 fn test_ruby_reserved_keywords() {
621 assert_eq!(sanitize_identifier("def", TargetLanguage::Ruby), "_def");
622 assert_eq!(sanitize_identifier("class", TargetLanguage::Ruby), "_class");
623 assert_eq!(sanitize_identifier("module", TargetLanguage::Ruby), "_module");
624 assert_eq!(sanitize_identifier("if", TargetLanguage::Ruby), "_if");
625 assert_eq!(sanitize_identifier("unless", TargetLanguage::Ruby), "_unless");
626 assert_eq!(sanitize_identifier("case", TargetLanguage::Ruby), "_case");
627 assert_eq!(sanitize_identifier("when", TargetLanguage::Ruby), "_when");
628 assert_eq!(sanitize_identifier("return", TargetLanguage::Ruby), "_return");
629 }
630
631 #[test]
632 fn test_ruby_non_keywords() {
633 assert_eq!(sanitize_identifier("hello", TargetLanguage::Ruby), "hello");
634 assert_eq!(sanitize_identifier("my_var", TargetLanguage::Ruby), "my_var");
635 }
636
637 #[test]
639 fn test_php_reserved_keywords() {
640 assert_eq!(sanitize_identifier("abstract", TargetLanguage::Php), "_abstract");
641 assert_eq!(sanitize_identifier("class", TargetLanguage::Php), "_class");
642 assert_eq!(sanitize_identifier("function", TargetLanguage::Php), "_function");
643 assert_eq!(sanitize_identifier("interface", TargetLanguage::Php), "_interface");
644 assert_eq!(sanitize_identifier("namespace", TargetLanguage::Php), "_namespace");
645 assert_eq!(sanitize_identifier("use", TargetLanguage::Php), "_use");
646 assert_eq!(sanitize_identifier("return", TargetLanguage::Php), "_return");
647 }
648
649 #[test]
650 fn test_php_non_keywords() {
651 assert_eq!(sanitize_identifier("hello", TargetLanguage::Php), "hello");
652 assert_eq!(sanitize_identifier("my_class", TargetLanguage::Php), "my_class");
653 }
654
655 #[test]
657 fn test_sanitize_invalid_characters() {
658 assert_eq!(
659 sanitize_identifier("hello-world", TargetLanguage::Python),
660 "hello_world"
661 );
662 assert_eq!(
663 sanitize_identifier("hello world", TargetLanguage::Python),
664 "hello_world"
665 );
666 assert_eq!(
667 sanitize_identifier("hello@world#test", TargetLanguage::Python),
668 "hello_world_test"
669 );
670 assert_eq!(
671 sanitize_identifier("hello.world", TargetLanguage::Python),
672 "hello_world"
673 );
674 }
675
676 #[test]
677 fn test_sanitize_leading_digit() {
678 assert_eq!(sanitize_identifier("42answer", TargetLanguage::Python), "_42answer");
679 assert_eq!(sanitize_identifier("123start", TargetLanguage::TypeScript), "_123start");
680 assert_eq!(sanitize_identifier("1st_place", TargetLanguage::Rust), "_1st_place");
681 }
682
683 #[test]
684 fn test_sanitize_multiple_underscores() {
685 assert_eq!(
686 sanitize_identifier("hello__world", TargetLanguage::Python),
687 "hello_world"
688 );
689 assert_eq!(sanitize_identifier("__double__", TargetLanguage::TypeScript), "double");
690 assert_eq!(sanitize_identifier("___triple___", TargetLanguage::Ruby), "triple");
691 }
692
693 #[test]
694 fn test_sanitize_empty_and_invalid() {
695 assert_eq!(sanitize_identifier("", TargetLanguage::Python), "field");
696 assert_eq!(sanitize_identifier("---", TargetLanguage::TypeScript), "field");
697 assert_eq!(sanitize_identifier("___", TargetLanguage::Rust), "field");
698 assert_eq!(sanitize_identifier("@#$", TargetLanguage::Ruby), "field");
699 }
700
701 #[test]
702 fn test_sanitize_case_insensitive_keywords() {
703 assert_eq!(sanitize_identifier("CLASS", TargetLanguage::Python), "_class");
704 assert_eq!(sanitize_identifier("CLASS", TargetLanguage::TypeScript), "_class");
705 assert_eq!(sanitize_identifier("FN", TargetLanguage::Rust), "r#fn");
706 assert_eq!(sanitize_identifier("DEF", TargetLanguage::Ruby), "_def");
707 }
708
709 #[test]
710 fn test_sanitize_camel_case() {
711 assert_eq!(
712 sanitize_identifier_camel_case("hello_world", TargetLanguage::TypeScript),
713 "helloWorld"
714 );
715 assert_eq!(
716 sanitize_identifier_camel_case("my_function_name", TargetLanguage::TypeScript),
717 "myFunctionName"
718 );
719 assert_eq!(
720 sanitize_identifier_camel_case("hello_world_test", TargetLanguage::TypeScript),
721 "helloWorldTest"
722 );
723 }
724
725 #[test]
726 fn test_sanitize_pascal_case() {
727 assert_eq!(
728 sanitize_identifier_pascal_case("hello_world", TargetLanguage::Rust),
729 "HelloWorld"
730 );
731 assert_eq!(
732 sanitize_identifier_pascal_case("my_class_name", TargetLanguage::TypeScript),
733 "MyClassName"
734 );
735 assert_eq!(
736 sanitize_identifier_pascal_case("simple", TargetLanguage::Rust),
737 "Simple"
738 );
739 }
740
741 #[test]
742 fn test_sanitize_snake_case() {
743 assert_eq!(
744 sanitize_identifier_snake_case("HelloWorld", TargetLanguage::Python),
745 "hello_world"
746 );
747 assert_eq!(
748 sanitize_identifier_snake_case("MyFunctionName", TargetLanguage::Python),
749 "my_function_name"
750 );
751 assert_eq!(sanitize_identifier_snake_case("hello", TargetLanguage::Python), "hello");
752 }
753
754 #[test]
755 fn test_combined_keyword_and_format() {
756 let result = sanitize_identifier_camel_case("hello_class", TargetLanguage::TypeScript);
758 assert_eq!(result, "helloClass"); assert!(!result.starts_with('_'));
760
761 let result = sanitize_identifier_snake_case("HelloClass", TargetLanguage::Python);
763 assert_eq!(result, "hello_class");
764
765 let result = sanitize_identifier_snake_case("Class", TargetLanguage::Python);
767 assert_eq!(result, "_class");
768 }
769}