perl_module/token_core/
mod.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct ModuleTokenSpan {
6 pub start: usize,
8 pub end: usize,
10}
11
12#[must_use]
17pub fn parse_module_token(text: &str, start: usize) -> Option<ModuleTokenSpan> {
18 let bytes = text.as_bytes();
19 if start >= bytes.len() || !is_identifier_start(bytes[start]) {
20 return None;
21 }
22
23 let token_start = start;
24 let mut index = parse_identifier_segment(bytes, start)?;
25
26 while let Some(next) = next_separator(bytes, index) {
27 index = match next {
28 Separator::Canonical => index + 2,
29 Separator::Legacy => index + 1,
30 };
31
32 index = parse_identifier_segment(bytes, index)?;
33 }
34
35 Some(ModuleTokenSpan { start: token_start, end: index })
36}
37
38#[must_use]
40pub fn has_standalone_module_token_boundaries(line: &str, start: usize, end: usize) -> bool {
41 let left_ok = !left_context_is_module_char(line, start);
42 let right_ok = !right_context_is_module_char(line, end);
43
44 left_ok && right_ok
45}
46
47#[must_use]
49pub fn is_module_token_char(ch: char) -> bool {
50 ch.is_ascii_alphanumeric() || ch == '_' || ch == ':'
51}
52
53#[must_use]
55pub fn is_module_identifier_char(ch: char) -> bool {
56 ch.is_ascii_alphanumeric() || ch == '_'
57}
58
59#[derive(Debug, Clone, Copy)]
60enum Separator {
61 Canonical,
62 Legacy,
63}
64
65fn next_separator(bytes: &[u8], index: usize) -> Option<Separator> {
66 if text_starts_with(bytes, index, "::") {
67 return Some(Separator::Canonical);
68 }
69
70 if index < bytes.len() && bytes[index] == b'\'' {
71 return Some(Separator::Legacy);
72 }
73
74 None
75}
76
77fn parse_identifier_segment(bytes: &[u8], start: usize) -> Option<usize> {
78 if start >= bytes.len() || !is_identifier_start(bytes[start]) {
79 return None;
80 }
81
82 let mut index = start + 1;
83 while index < bytes.len() && is_identifier_byte(bytes[index]) {
84 index += 1;
85 }
86
87 Some(index)
88}
89
90fn is_identifier_start(byte: u8) -> bool {
91 byte.is_ascii_alphabetic() || byte == b'_'
92}
93
94fn is_identifier_byte(byte: u8) -> bool {
95 byte.is_ascii_alphanumeric() || byte == b'_'
96}
97
98fn left_context_is_module_char(line: &str, start: usize) -> bool {
99 if start == 0 {
100 return false;
101 }
102
103 let mut left = line[..start].char_indices();
104 let Some((left_idx, ch)) = left.next_back() else {
105 return false;
106 };
107
108 if ch != '\'' {
109 return is_module_token_char(ch);
110 }
111
112 if left_idx == 0 {
113 return false;
114 }
115
116 line[..left_idx].chars().next_back().is_some_and(is_module_identifier_char)
117}
118
119fn right_context_is_module_char(line: &str, end: usize) -> bool {
120 if end >= line.len() {
121 return false;
122 }
123
124 let mut right = line[end..].chars();
125 let Some(ch) = right.next() else {
126 return false;
127 };
128
129 if ch != '\'' {
130 return is_module_token_char(ch);
131 }
132
133 right.next().is_some_and(is_module_identifier_char)
134}
135
136fn text_starts_with(bytes: &[u8], start: usize, needle: &str) -> bool {
137 let bytes_len = bytes.len();
138 let needle_bytes = needle.as_bytes();
139 if start + needle_bytes.len() > bytes_len {
140 return false;
141 }
142
143 &bytes[start..start + needle_bytes.len()] == needle_bytes
144}