Skip to main content

srcmap_sourcemap/
js_identifiers.rs

1//! JavaScript identifier validation utilities.
2//!
3//! Provides functions to check whether a string is a valid JavaScript
4//! identifier and to extract the first identifier token from a source line.
5
6/// Check if a string is a valid JavaScript identifier.
7///
8/// Follows the ECMAScript specification for `IdentifierName`:
9/// - First character: `$`, `_`, ASCII letter, or Unicode ID_Start
10/// - Subsequent characters: `$`, `_`, ASCII alphanumeric, `\u{200c}` (ZWNJ),
11///   `\u{200d}` (ZWJ), or Unicode ID_Continue
12pub fn is_valid_javascript_identifier(s: &str) -> bool {
13    let mut chars = s.chars();
14    let Some(first) = chars.next() else {
15        return false;
16    };
17    if !is_id_start(first) {
18        return false;
19    }
20    chars.all(is_id_continue)
21}
22
23/// Extract the first valid JavaScript identifier from a source line.
24///
25/// Skips leading whitespace, then collects characters that form a valid
26/// identifier. Returns `None` if no identifier is found.
27pub fn get_javascript_token(source_line: &str) -> Option<&str> {
28    let trimmed = source_line.trim_start();
29    if trimmed.is_empty() {
30        return None;
31    }
32
33    let mut chars = trimmed.chars();
34    let first = chars.next()?;
35    if !is_id_start(first) {
36        return None;
37    }
38
39    let start = source_line.len() - trimmed.len();
40    let mut end = start + first.len_utf8();
41
42    for ch in chars {
43        if !is_id_continue(ch) {
44            break;
45        }
46        end += ch.len_utf8();
47    }
48
49    let token = &source_line[start..end];
50    if token.is_empty() { None } else { Some(token) }
51}
52
53/// Check if a character can start a JavaScript identifier.
54fn is_id_start(c: char) -> bool {
55    c == '$' || c == '_' || c.is_ascii_alphabetic() || (!c.is_ascii() && c.is_alphabetic())
56}
57
58/// Check if a character can continue a JavaScript identifier.
59fn is_id_continue(c: char) -> bool {
60    c == '$'
61        || c == '_'
62        || c == '\u{200c}'
63        || c == '\u{200d}'
64        || c.is_ascii_alphanumeric()
65        || (!c.is_ascii() && c.is_alphanumeric())
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn test_valid_identifiers() {
74        assert!(is_valid_javascript_identifier("foo"));
75        assert!(is_valid_javascript_identifier("_bar"));
76        assert!(is_valid_javascript_identifier("$baz"));
77        assert!(is_valid_javascript_identifier("camelCase"));
78        assert!(is_valid_javascript_identifier("PascalCase"));
79        assert!(is_valid_javascript_identifier("snake_case"));
80        assert!(is_valid_javascript_identifier("x"));
81        assert!(is_valid_javascript_identifier("_"));
82        assert!(is_valid_javascript_identifier("$"));
83        assert!(is_valid_javascript_identifier("_$mixed123"));
84        assert!(is_valid_javascript_identifier("a1"));
85    }
86
87    #[test]
88    fn test_invalid_identifiers() {
89        assert!(!is_valid_javascript_identifier(""));
90        assert!(!is_valid_javascript_identifier("123"));
91        assert!(!is_valid_javascript_identifier("1abc"));
92        assert!(!is_valid_javascript_identifier("-foo"));
93        assert!(!is_valid_javascript_identifier("foo bar"));
94        assert!(!is_valid_javascript_identifier("foo-bar"));
95        assert!(!is_valid_javascript_identifier(".foo"));
96    }
97
98    #[test]
99    fn test_unicode_identifiers() {
100        // CJK characters
101        assert!(is_valid_javascript_identifier("\u{4e16}\u{754c}"));
102        // Accented characters
103        assert!(is_valid_javascript_identifier("\u{00e9}l\u{00e8}ve"));
104        // Cyrillic
105        assert!(is_valid_javascript_identifier(
106            "\u{0442}\u{0435}\u{0441}\u{0442}"
107        ));
108    }
109
110    #[test]
111    fn test_zwnj_zwj() {
112        // ZWNJ and ZWJ are valid as continuation characters
113        assert!(is_valid_javascript_identifier("a\u{200c}b"));
114        assert!(is_valid_javascript_identifier("a\u{200d}b"));
115        // But not as start characters
116        assert!(!is_valid_javascript_identifier("\u{200c}abc"));
117        assert!(!is_valid_javascript_identifier("\u{200d}abc"));
118    }
119
120    #[test]
121    fn test_get_javascript_token_basic() {
122        assert_eq!(get_javascript_token("hello world"), Some("hello"));
123        assert_eq!(get_javascript_token("  foo(bar)"), Some("foo"));
124        assert_eq!(get_javascript_token("  _private"), Some("_private"));
125        assert_eq!(get_javascript_token("  $jquery"), Some("$jquery"));
126    }
127
128    #[test]
129    fn test_get_javascript_token_whitespace() {
130        assert_eq!(get_javascript_token("   abc123"), Some("abc123"));
131        assert_eq!(get_javascript_token("\t\ttab"), Some("tab"));
132        assert_eq!(get_javascript_token("noSpace"), Some("noSpace"));
133    }
134
135    #[test]
136    fn test_get_javascript_token_none() {
137        assert_eq!(get_javascript_token(""), None);
138        assert_eq!(get_javascript_token("   "), None);
139        assert_eq!(get_javascript_token("  123abc"), None);
140        assert_eq!(get_javascript_token("  .foo"), None);
141        assert_eq!(get_javascript_token("  (bar)"), None);
142    }
143
144    #[test]
145    fn test_get_javascript_token_stops_at_non_ident() {
146        assert_eq!(get_javascript_token("foo.bar"), Some("foo"));
147        assert_eq!(get_javascript_token("func("), Some("func"));
148        assert_eq!(get_javascript_token("x = 5"), Some("x"));
149        assert_eq!(get_javascript_token("arr[0]"), Some("arr"));
150    }
151
152    #[test]
153    fn test_get_javascript_token_unicode() {
154        assert_eq!(
155            get_javascript_token("  \u{4e16}\u{754c}!"),
156            Some("\u{4e16}\u{754c}")
157        );
158    }
159}