1use perl_ast_v2::{Node, NodeKind};
7use perl_lexer::TokenType;
8use perl_position_tracking::Range;
9
10#[derive(Debug, Clone, PartialEq)]
12pub enum Trivia {
13 Whitespace(String),
15 LineComment(String),
17 PodComment(String),
19 Newline,
21}
22
23impl Trivia {
24 pub fn as_str(&self) -> &str {
26 match self {
27 Trivia::Whitespace(s) => s,
28 Trivia::LineComment(s) => s,
29 Trivia::PodComment(s) => s,
30 Trivia::Newline => "\n",
31 }
32 }
33
34 pub fn kind_name(&self) -> &'static str {
36 match self {
37 Trivia::Whitespace(_) => "whitespace",
38 Trivia::LineComment(_) => "comment",
39 Trivia::PodComment(_) => "pod",
40 Trivia::Newline => "newline",
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct NodeWithTrivia {
48 pub node: Node,
50 pub leading_trivia: Vec<TriviaToken>,
52 pub trailing_trivia: Vec<TriviaToken>,
54}
55
56#[derive(Debug, Clone)]
58pub struct TriviaToken {
59 pub trivia: Trivia,
61 pub range: Range,
63}
64
65impl TriviaToken {
66 pub fn new(trivia: Trivia, range: Range) -> Self {
68 TriviaToken { trivia, range }
69 }
70}
71
72pub trait TriviaCollector {
76 fn collect_leading_trivia(&mut self) -> Vec<TriviaToken>;
78
79 fn collect_trailing_trivia(&mut self) -> Vec<TriviaToken>;
81}
82
83pub struct TriviaLexer {
87 lexer: perl_lexer::PerlLexer<'static>,
89 source: String,
91 position: usize,
93 _trivia_buffer: Vec<TriviaToken>,
95}
96
97impl TriviaLexer {
98 pub fn new(source: String) -> Self {
100 let source_ref: &'static str = Box::leak(source.clone().into_boxed_str());
103
104 TriviaLexer {
105 lexer: perl_lexer::PerlLexer::new(source_ref),
106 source,
107 position: 0,
108 _trivia_buffer: Vec::new(),
109 }
110 }
111
112 pub fn next_token_with_trivia(&mut self) -> Option<(perl_lexer::Token, Vec<TriviaToken>)> {
116 let trivia = self.collect_trivia();
118
119 let token = self.lexer.next_token()?;
121
122 self.position = self.position.max(token.end);
124
125 if matches!(token.token_type, TokenType::EOF) {
127 if !trivia.is_empty() {
128 return Some((token, trivia));
130 }
131 return None;
132 }
133
134 Some((token, trivia))
135 }
136
137 fn collect_trivia(&mut self) -> Vec<TriviaToken> {
139 let mut trivia = Vec::new();
140
141 while self.position < self.source.len() {
142 let remaining = &self.source[self.position..];
143
144 if let Some(ws_len) = self.whitespace_length(remaining) {
146 let ws = &remaining[..ws_len];
147 let start = self.position;
148 let end = start + ws_len;
149
150 if ws.chars().all(|c| c == '\n' || c == '\r') {
152 trivia.push(TriviaToken::new(
153 Trivia::Newline,
154 Range::new(
155 perl_position_tracking::Position::new(start, 0, 0),
156 perl_position_tracking::Position::new(end, 0, 0),
157 ),
158 ));
159 } else {
160 trivia.push(TriviaToken::new(
161 Trivia::Whitespace(ws.to_string()),
162 Range::new(
163 perl_position_tracking::Position::new(start, 0, 0),
164 perl_position_tracking::Position::new(end, 0, 0),
165 ),
166 ));
167 }
168
169 self.position += ws_len;
170 continue;
171 }
172
173 if remaining.starts_with('#') {
175 let comment_end = remaining.find('\n').unwrap_or(remaining.len());
176 let comment = &remaining[..comment_end];
177 let start = self.position;
178 let end = start + comment_end;
179
180 trivia.push(TriviaToken::new(
181 Trivia::LineComment(comment.to_string()),
182 Range::new(
183 perl_position_tracking::Position::new(start, 0, 0),
184 perl_position_tracking::Position::new(end, 0, 0),
185 ),
186 ));
187
188 self.position += comment_end;
189 continue;
190 }
191
192 if remaining.starts_with("=")
194 && (self.position == 0 || self.source.as_bytes()[self.position - 1] == b'\n')
195 {
196 if let Some(pod_end) = self.find_pod_end(remaining) {
197 let pod = &remaining[..pod_end];
198 let start = self.position;
199 let end = start + pod_end;
200
201 trivia.push(TriviaToken::new(
202 Trivia::PodComment(pod.to_string()),
203 Range::new(
204 perl_position_tracking::Position::new(start, 0, 0),
205 perl_position_tracking::Position::new(end, 0, 0),
206 ),
207 ));
208
209 self.position += pod_end;
210 continue;
211 }
212 }
213
214 break;
216 }
217
218 if self.position > 0 {
220 }
223
224 trivia
225 }
226
227 fn whitespace_length(&self, s: &str) -> Option<usize> {
229 let mut len = 0;
230 for ch in s.chars() {
231 if ch.is_whitespace() && ch != '\n' && ch != '\r' {
232 len += ch.len_utf8();
233 } else if ch == '\n' || ch == '\r' {
234 len += ch.len_utf8();
236 if ch == '\r' && s[len..].starts_with('\n') {
238 len += 1;
239 }
240 break;
241 } else {
242 break;
243 }
244 }
245
246 if len > 0 { Some(len) } else { None }
247 }
248
249 fn find_pod_end(&self, s: &str) -> Option<usize> {
251 let mut pos = 0;
253 for line in s.lines() {
254 if line.trim() == "=cut" {
255 return Some(pos + line.len());
256 }
257 pos += line.len() + 1; }
259
260 Some(s.len())
262 }
263}
264
265pub struct TriviaPreservingParser {
269 lexer: TriviaLexer,
271 current: Option<(perl_lexer::Token, Vec<TriviaToken>)>,
273 id_generator: perl_ast_v2::NodeIdGenerator,
275}
276
277impl TriviaPreservingParser {
278 pub fn new(source: String) -> Self {
280 let mut parser = TriviaPreservingParser {
281 lexer: TriviaLexer::new(source),
282 current: None,
283 id_generator: perl_ast_v2::NodeIdGenerator::new(),
284 };
285 parser.advance();
287 parser
288 }
289
290 fn advance(&mut self) {
292 self.current = self.lexer.next_token_with_trivia();
293 }
294
295 pub fn parse(mut self) -> NodeWithTrivia {
299 let leading_trivia =
300 if let Some((_, trivia)) = &self.current { trivia.clone() } else { Vec::new() };
301
302 let node = Node::new(
304 self.id_generator.next_id(),
305 NodeKind::Program { statements: Vec::new() },
306 Range::new(
307 perl_position_tracking::Position::new(0, 1, 1),
308 perl_position_tracking::Position::new(0, 1, 1),
309 ),
310 );
311
312 NodeWithTrivia { node, leading_trivia, trailing_trivia: Vec::new() }
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use perl_tdd_support::must_some;
320
321 #[test]
322 fn test_trivia_collection() {
323 let source = " # comment\n my $x = 42;".to_string();
324 let mut lexer = TriviaLexer::new(source);
325
326 let (_token, trivia) = must_some(lexer.next_token_with_trivia());
327
328 eprintln!("Trivia count: {}", trivia.len());
330 for (i, t) in trivia.iter().enumerate() {
331 eprintln!("Trivia[{}]: {:?}", i, t.trivia);
332 }
333 assert!(trivia.len() >= 2); assert!(trivia.iter().any(|t| matches!(&t.trivia, Trivia::Whitespace(_))));
335 assert!(trivia.iter().any(|t| matches!(&t.trivia, Trivia::LineComment(_))));
336 }
337
338 #[test]
339 fn test_pod_preservation() {
340 let source = "=head1 NAME\n\nTest\n\n=cut\n\nmy $x;".to_string();
341 let mut lexer = TriviaLexer::new(source);
342
343 let (_, trivia) = must_some(lexer.next_token_with_trivia());
344
345 assert!(trivia.iter().any(|t| matches!(&t.trivia, Trivia::PodComment(_))));
347 }
348}