perl_module_token_core/
lib.rs1#![deny(unsafe_code)]
10#![warn(rust_2018_idioms)]
11#![warn(missing_docs)]
12#![warn(clippy::all)]
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct ModuleTokenSpan {
17 pub start: usize,
19 pub end: usize,
21}
22
23#[must_use]
28pub fn parse_module_token(text: &str, start: usize) -> Option<ModuleTokenSpan> {
29 let bytes = text.as_bytes();
30 if start >= bytes.len() || !is_identifier_start(bytes[start]) {
31 return None;
32 }
33
34 let token_start = start;
35 let mut index = parse_identifier_segment(bytes, start)?;
36
37 while let Some(next) = next_separator(bytes, index) {
38 index = match next {
39 Separator::Canonical => index + 2,
40 Separator::Legacy => index + 1,
41 };
42
43 index = parse_identifier_segment(bytes, index)?;
44 }
45
46 Some(ModuleTokenSpan { start: token_start, end: index })
47}
48
49#[must_use]
51pub fn has_standalone_module_token_boundaries(line: &str, start: usize, end: usize) -> bool {
52 let left_ok = !left_context_is_module_char(line, start);
53 let right_ok = !right_context_is_module_char(line, end);
54
55 left_ok && right_ok
56}
57
58#[must_use]
60pub fn is_module_token_char(ch: char) -> bool {
61 ch.is_ascii_alphanumeric() || ch == '_' || ch == ':'
62}
63
64#[must_use]
66pub fn is_module_identifier_char(ch: char) -> bool {
67 ch.is_ascii_alphanumeric() || ch == '_'
68}
69
70#[derive(Debug, Clone, Copy)]
71enum Separator {
72 Canonical,
73 Legacy,
74}
75
76fn next_separator(bytes: &[u8], index: usize) -> Option<Separator> {
77 if text_starts_with(bytes, index, "::") {
78 return Some(Separator::Canonical);
79 }
80
81 if index < bytes.len() && bytes[index] == b'\'' {
82 return Some(Separator::Legacy);
83 }
84
85 None
86}
87
88fn parse_identifier_segment(bytes: &[u8], start: usize) -> Option<usize> {
89 if start >= bytes.len() || !is_identifier_start(bytes[start]) {
90 return None;
91 }
92
93 let mut index = start + 1;
94 while index < bytes.len() && is_identifier_byte(bytes[index]) {
95 index += 1;
96 }
97
98 Some(index)
99}
100
101fn is_identifier_start(byte: u8) -> bool {
102 byte.is_ascii_alphabetic() || byte == b'_'
103}
104
105fn is_identifier_byte(byte: u8) -> bool {
106 byte.is_ascii_alphanumeric() || byte == b'_'
107}
108
109fn left_context_is_module_char(line: &str, start: usize) -> bool {
110 if start == 0 {
111 return false;
112 }
113
114 let mut left = line[..start].char_indices();
115 let Some((left_idx, ch)) = left.next_back() else {
116 return false;
117 };
118
119 if ch != '\'' {
120 return is_module_token_char(ch);
121 }
122
123 if left_idx == 0 {
124 return false;
125 }
126
127 line[..left_idx].chars().next_back().is_some_and(is_module_identifier_char)
128}
129
130fn right_context_is_module_char(line: &str, end: usize) -> bool {
131 if end >= line.len() {
132 return false;
133 }
134
135 let mut right = line[end..].chars();
136 let Some(ch) = right.next() else {
137 return false;
138 };
139
140 if ch != '\'' {
141 return is_module_token_char(ch);
142 }
143
144 right.next().is_some_and(is_module_identifier_char)
145}
146
147fn text_starts_with(bytes: &[u8], start: usize, needle: &str) -> bool {
148 let bytes_len = bytes.len();
149 let needle_bytes = needle.as_bytes();
150 if start + needle_bytes.len() > bytes_len {
151 return false;
152 }
153
154 &bytes[start..start + needle_bytes.len()] == needle_bytes
155}
156
157#[cfg(test)]
158mod tests {
159 use super::{
160 ModuleTokenSpan, has_standalone_module_token_boundaries, is_module_identifier_char,
161 is_module_token_char, parse_module_token,
162 };
163
164 #[test]
165 fn parses_canonical_and_legacy_tokens() {
166 assert_eq!(
167 parse_module_token("use Foo::Bar;", 4),
168 Some(ModuleTokenSpan { start: 4, end: 12 })
169 );
170 assert_eq!(
171 parse_module_token("use Foo'Bar;", 4),
172 Some(ModuleTokenSpan { start: 4, end: 11 })
173 );
174 }
175
176 #[test]
177 fn detects_standalone_token_boundaries() {
178 assert!(has_standalone_module_token_boundaries("use Foo::Bar;", 4, 12));
179 assert!(!has_standalone_module_token_boundaries("use Foo::Bar::Extra;", 4, 12));
180 }
181
182 #[test]
183 fn exports_token_character_classes() {
184 assert!(is_module_token_char(':'));
185 assert!(is_module_token_char('_'));
186 assert!(is_module_identifier_char('_'));
187 assert!(!is_module_identifier_char(':'));
188 }
189}