1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
4pub enum Jump {
5 In,
7 Out,
9 Regular,
11}
12
13impl Jump {
14 pub fn to_str(self) -> &'static str {
16 match self {
17 Self::In => "i",
18 Self::Out => "o",
19 Self::Regular => "-",
20 }
21 }
22
23 fn to_int(self) -> u32 {
24 match self {
25 Self::In => 0,
26 Self::Out => 1,
27 Self::Regular => 2,
28 }
29 }
30
31 fn from_int(i: u32) -> Self {
32 match i {
33 0 => Self::In,
34 1 => Self::Out,
35 2 => Self::Regular,
36 _ => unreachable!(),
37 }
38 }
39}
40
41impl fmt::Display for Jump {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 f.write_str(self.to_str())
44 }
45}
46
47#[derive(Debug, thiserror::Error)]
49pub struct SyntaxError(Box<SyntaxErrorInner>);
50
51#[derive(Debug)]
52struct SyntaxErrorInner {
53 pos: Option<usize>,
54 msg: String,
55}
56
57impl fmt::Display for SyntaxError {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 f.write_str("failed to parse source map: ")?;
60 if let Some(pos) = self.0.pos {
61 write!(f, "[{pos}] ")?;
62 }
63 f.write_str(&self.0.msg)
64 }
65}
66
67impl SyntaxError {
68 fn new(pos: impl Into<Option<usize>>, msg: impl Into<String>) -> Self {
69 Self(Box::new(SyntaxErrorInner { pos: pos.into(), msg: msg.into() }))
70 }
71}
72
73impl From<std::num::TryFromIntError> for SyntaxError {
74 fn from(_value: std::num::TryFromIntError) -> Self {
75 Self::new(None, "offset overflow")
76 }
77}
78
79#[derive(PartialEq, Eq)]
80enum Token<'a> {
81 Number(&'a str),
83 Semicolon,
85 Colon,
87 In,
89 Out,
91 Regular,
93}
94
95impl fmt::Debug for Token<'_> {
96 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97 match self {
98 Token::Number(s) => write!(f, "NUMBER({s:?})"),
99 Token::Semicolon => write!(f, "SEMICOLON"),
100 Token::Colon => write!(f, "COLON"),
101 Token::In => write!(f, "JMP(i)"),
102 Token::Out => write!(f, "JMP(o)"),
103 Token::Regular => write!(f, "JMP(-)"),
104 }
105 }
106}
107
108impl fmt::Display for Token<'_> {
109 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110 match self {
111 Token::Number(_) => write!(f, "number"),
112 Token::Semicolon => write!(f, "`;`"),
113 Token::Colon => write!(f, "`:`"),
114 Token::In => write!(f, "jmp-in"),
115 Token::Out => write!(f, "jmp-out"),
116 Token::Regular => write!(f, "jmp"),
117 }
118 }
119}
120
121struct Lexer<'input> {
122 input: &'input str,
123 chars: Peekable<CharIndices<'input>>,
124}
125
126impl<'input> Lexer<'input> {
127 fn new(input: &'input str) -> Self {
128 Lexer { chars: input.char_indices().peekable(), input }
129 }
130
131 fn number(&mut self, start: usize, mut end: usize) -> Token<'input> {
132 loop {
133 if let Some((_, ch)) = self.chars.peek().cloned() {
134 if !ch.is_ascii_digit() {
135 break;
136 }
137 self.chars.next();
138 end += 1;
139 } else {
140 end = self.input.len();
141 break;
142 }
143 }
144 Token::Number(&self.input[start..end])
145 }
146}
147
148impl<'input> Iterator for Lexer<'input> {
149 type Item = Result<(Token<'input>, usize), SyntaxError>;
150
151 fn next(&mut self) -> Option<Self::Item> {
152 let (start, ch) = self.chars.next()?;
153 let token = match ch {
154 ';' => Token::Semicolon,
155 ':' => Token::Colon,
156 'i' => Token::In,
157 'o' => Token::Out,
158 '-' => match self.chars.peek() {
159 Some((_, ch)) if ch.is_ascii_digit() => {
160 self.chars.next();
161 self.number(start, start + 2)
162 }
163 _ => Token::Regular,
164 },
165 ch if ch.is_ascii_digit() => self.number(start, start + 1),
166 ch => return Some(Err(SyntaxError::new(start, format!("unexpected character: {ch}")))),
167 };
168 Some(Ok((token, start)))
169 }
170}
171
172pub type SourceMap = Vec<SourceElement>;
177
178#[derive(Clone, PartialEq, Eq, Hash)]
182pub struct SourceElement {
183 offset: u32,
184 length: u32,
185 index: i32,
186 jump_and_modifier_depth: u32,
188}
189
190impl fmt::Debug for SourceElement {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 f.debug_struct("SourceElement")
193 .field("offset", &self.offset())
194 .field("length", &self.length())
195 .field("index", &self.index_i32())
196 .field("jump", &self.jump())
197 .field("modifier_depth", &self.modifier_depth())
198 .field("formatted", &format_args!("{self}"))
199 .finish()
200 }
201}
202
203impl Default for SourceElement {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209impl SourceElement {
210 pub fn new() -> Self {
212 Self { offset: 0, length: 0, index: -1, jump_and_modifier_depth: 0 }
213 }
214
215 #[deprecated = "use `new` instead"]
217 pub fn new_invalid() -> Self {
218 Self::new()
219 }
220
221 #[inline]
223 pub fn offset(&self) -> u32 {
224 self.offset
225 }
226
227 #[inline]
229 pub fn length(&self) -> u32 {
230 self.length
231 }
232
233 #[inline]
240 pub fn index(&self) -> Option<u32> {
241 if self.index == -1 {
242 None
243 } else {
244 Some(self.index as u32)
245 }
246 }
247
248 #[inline]
252 pub fn index_i32(&self) -> i32 {
253 self.index
254 }
255
256 #[inline]
258 pub fn jump(&self) -> Jump {
259 Jump::from_int(self.jump_and_modifier_depth >> 30)
260 }
261
262 #[inline]
263 fn set_jump(&mut self, jump: Jump) {
264 self.set_jump_and_modifier_depth(jump, self.modifier_depth());
265 }
266
267 #[inline]
272 pub fn modifier_depth(&self) -> u32 {
273 (self.jump_and_modifier_depth << 2) >> 2
274 }
275
276 #[inline]
277 fn set_modifier_depth(&mut self, modifier_depth: usize) -> Result<(), SyntaxError> {
278 if modifier_depth > (1 << 30) - 1 {
279 return Err(SyntaxError::new(None, "modifier depth overflow"));
280 }
281 self.set_jump_and_modifier_depth(self.jump(), modifier_depth as u32);
282 Ok(())
283 }
284
285 #[inline]
286 fn set_jump_and_modifier_depth(&mut self, jump: Jump, modifier_depth: u32) {
287 self.jump_and_modifier_depth = (jump.to_int() << 30) | modifier_depth;
288 }
289}
290
291impl fmt::Display for SourceElement {
292 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293 write!(
294 f,
295 "{}:{}:{}:{}:{}",
296 self.offset(),
297 self.length(),
298 self.index_i32(),
299 self.jump(),
300 self.modifier_depth(),
301 )
302 }
303}
304
305#[derive(Default)]
306struct SourceElementBuilder {
307 offset: Option<usize>,
308 length: Option<usize>,
309 index: Option<Option<u32>>,
310 jump: Option<Jump>,
311 modifier_depth: Option<usize>,
312}
313
314impl fmt::Display for SourceElementBuilder {
315 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316 if self.offset.is_none()
317 && self.length.is_none()
318 && self.index.is_none()
319 && self.jump.is_none()
320 && self.modifier_depth.is_none()
321 {
322 return Ok(());
323 }
324
325 if let Some(s) = self.offset {
326 if s == 0 && self.index == Some(None) {
327 f.write_str("-1")?;
328 } else {
329 write!(f, "{s}")?;
330 }
331 }
332 if self.length.is_none()
333 && self.index.is_none()
334 && self.jump.is_none()
335 && self.modifier_depth.is_none()
336 {
337 return Ok(());
338 }
339 f.write_char(':')?;
340
341 if let Some(s) = self.length {
342 if s == 0 && self.index == Some(None) {
343 f.write_str("-1")?;
344 } else {
345 write!(f, "{s}")?;
346 }
347 }
348 if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
349 return Ok(());
350 }
351 f.write_char(':')?;
352
353 if let Some(s) = self.index {
354 let s = s.map(|s| s as i64).unwrap_or(-1);
355 write!(f, "{s}")?;
356 }
357 if self.jump.is_none() && self.modifier_depth.is_none() {
358 return Ok(());
359 }
360 f.write_char(':')?;
361
362 if let Some(s) = self.jump {
363 write!(f, "{s}")?;
364 }
365 if self.modifier_depth.is_none() {
366 return Ok(());
367 }
368 f.write_char(':')?;
369
370 if let Some(s) = self.modifier_depth {
371 if self.index == Some(None) {
372 f.write_str("-1")?;
373 } else {
374 s.fmt(f)?;
375 }
376 }
377
378 Ok(())
379 }
380}
381
382impl SourceElementBuilder {
383 fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
384 let mut element = prev.unwrap_or_default();
385 macro_rules! get_field {
386 (| $field:ident | $e:expr) => {
387 if let Some($field) = self.$field {
388 $e;
389 }
390 };
391 }
392 get_field!(|offset| element.offset = offset.try_into()?);
393 get_field!(|length| element.length = length.try_into()?);
394 get_field!(|index| element.index = index.map(|x| x as i32).unwrap_or(-1));
395 get_field!(|jump| element.set_jump(jump));
396 if let Some(modifier_depth) = self.modifier_depth {
398 element.set_modifier_depth(modifier_depth)?;
399 }
400 Ok(element)
401 }
402
403 fn set_jmp(&mut self, jmp: Jump, pos: usize) -> Result<(), SyntaxError> {
404 if self.jump.is_some() {
405 return Err(SyntaxError::new(pos, "jump already set"));
406 }
407 self.jump = Some(jmp);
408 Ok(())
409 }
410
411 fn set_offset(&mut self, offset: usize, pos: usize) -> Result<(), SyntaxError> {
412 if self.offset.is_some() {
413 return Err(SyntaxError::new(pos, "offset already set"));
414 }
415 self.offset = Some(offset);
416 Ok(())
417 }
418
419 fn set_length(&mut self, length: usize, pos: usize) -> Result<(), SyntaxError> {
420 if self.length.is_some() {
421 return Err(SyntaxError::new(pos, "length already set"));
422 }
423 self.length = Some(length);
424 Ok(())
425 }
426
427 fn set_index(&mut self, index: Option<u32>, pos: usize) -> Result<(), SyntaxError> {
428 if self.index.is_some() {
429 return Err(SyntaxError::new(pos, "index already set"));
430 }
431 self.index = Some(index);
432 Ok(())
433 }
434
435 fn set_modifier(&mut self, modifier_depth: usize, pos: usize) -> Result<(), SyntaxError> {
436 if self.modifier_depth.is_some() {
437 return Err(SyntaxError::new(pos, "modifier depth already set"));
438 }
439 self.modifier_depth = Some(modifier_depth);
440 Ok(())
441 }
442}
443
444pub struct Parser<'input> {
445 lexer: Lexer<'input>,
446 last_element: Option<SourceElement>,
447 done: bool,
448 #[cfg(test)]
449 output: Option<&'input mut dyn Write>,
450}
451
452impl<'input> Parser<'input> {
453 pub fn new(input: &'input str) -> Self {
454 Self {
455 done: input.is_empty(),
456 lexer: Lexer::new(input),
457 last_element: None,
458 #[cfg(test)]
459 output: None,
460 }
461 }
462
463 fn advance(&mut self) -> Result<Option<SourceElement>, SyntaxError> {
464 let mut state = State::Offset;
466 let mut builder = SourceElementBuilder::default();
467
468 let parse_number = |num: &str, pos: usize| {
469 let num = match num.parse::<i64>() {
470 Ok(num) => num,
471 Err(e) => return Err(SyntaxError::new(pos, e.to_string())),
472 };
473 match num {
474 ..-1 => Err(SyntaxError::new(pos, "unexpected negative number")),
475 -1 => Ok(None),
476 0.. => u32::try_from(num)
477 .map(Some)
478 .map_err(|_| SyntaxError::new(pos, "number too large")),
479 }
480 };
481
482 loop {
483 match self.lexer.next() {
484 Some(Ok((token, pos))) => match token {
485 Token::Semicolon => break,
486 Token::Number(num) => match state {
487 State::Offset => {
488 builder
489 .set_offset(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
490 }
491 State::Length => {
492 builder
493 .set_length(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
494 }
495 State::Index => {
496 builder.set_index(parse_number(num, pos)?, pos)?;
497 }
498 State::Modifier => builder
499 .set_modifier(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?,
500 State::Jmp => {
501 return Err(SyntaxError::new(pos, "expected jump, found number"));
502 }
503 },
504 Token::Colon => state.advance(pos)?,
505 Token::In => builder.set_jmp(Jump::In, pos)?,
506 Token::Out => builder.set_jmp(Jump::Out, pos)?,
507 Token::Regular => builder.set_jmp(Jump::Regular, pos)?,
508 },
509 Some(Err(err)) => return Err(err),
510 None => {
511 if self.done {
512 return Ok(None);
513 }
514 self.done = true;
515 break;
516 }
517 }
518 }
519
520 #[cfg(test)]
521 if let Some(out) = self.output.as_mut() {
522 if self.last_element.is_some() {
523 out.write_char(';').unwrap();
524 }
525 write!(out, "{builder}").unwrap();
526 }
527
528 let element = builder.finish(self.last_element.take())?;
529 self.last_element = Some(element.clone());
530 Ok(Some(element))
531 }
532}
533
534impl Iterator for Parser<'_> {
535 type Item = Result<SourceElement, SyntaxError>;
536
537 fn next(&mut self) -> Option<Self::Item> {
538 self.advance().transpose()
539 }
540}
541
542#[derive(Clone, Copy, PartialEq, Eq)]
544enum State {
545 Offset,
547 Length,
549 Index,
551 Jmp,
553 Modifier,
555}
556
557impl State {
558 fn advance(&mut self, pos: usize) -> Result<(), SyntaxError> {
559 *self = match self {
560 Self::Offset => Self::Length,
561 Self::Length => Self::Index,
562 Self::Index => Self::Jmp,
563 Self::Jmp => Self::Modifier,
564 Self::Modifier => return Err(SyntaxError::new(pos, "unexpected colon")),
565 };
566 Ok(())
567 }
568}
569
570pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
572 Parser::new(input).collect::<Result<SourceMap, SyntaxError>>().map(|mut v| {
573 v.shrink_to_fit();
574 v
575 })
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581
582 fn parse_test(input: &str) {
583 match parse_test_(input) {
584 Ok(_) => {}
585 Err(e) => panic!("{e}"),
586 }
587 }
588
589 fn parse_test_(input: &str) -> Result<SourceMap, SyntaxError> {
590 let mut s = String::new();
591 let mut p = Parser::new(input);
592 p.output = Some(&mut s);
593 let sm = p.collect::<Result<SourceMap, _>>()?;
594 if s != input {
595 return Err(SyntaxError::new(
596 None,
597 format!("mismatched output:\n actual: {s:?}\n expected: {input:?}\n sm: {sm:#?}"),
598 ));
599 }
600 Ok(sm)
601 }
602
603 #[test]
604 fn empty() {
605 parse_test("");
606 }
607
608 #[test]
609 fn source_maps() {
610 let source_maps = include_str!("../../../../test-data/out-source-maps.txt");
612
613 for (line, s) in source_maps.lines().enumerate() {
614 let line = line + 1;
615 parse_test_(s).unwrap_or_else(|e| panic!("Failed to parse line {line}: {e}\n{s:?}"));
616 }
617 }
618
619 #[test]
620 fn cheatcodes() {
621 let s = include_str!("../../../../test-data/cheatcodes.sol-sourcemap.txt");
622 parse_test(s);
623 }
624
625 #[test]
627 fn univ4_deployer() {
628 let s = ":::-:0;;1888:10801:91;2615:100;;;2679:3;2615:100;;;;2700:4;2615:100;;;;-1:-1:-1;2615:100:91;;;;2546:169;;;-1:-1:-1;;2546:169:91;;;;;;;;;;;2615:100;2546:169;;;2615:100;2797:101;;;;;;;;;-1:-1:-1;;2797:101:91;;;;;;;;2546:169;2721:177;;;;;;;;;;;;;;;;;;2957:101;1888:10801;2957:101;2797;2957;;;-1:-1:-1;;2957:101:91;;;;356:29:89;2957:101:91;;;;2904:154;;;-1:-1:-1;;2904:154:91;;;;;;;;;;;;-1:-1:-1;;;;;;2904:154:91;;;;;;;;4018:32;;;;;4048:2;4018:32;;;4056:74;;;-1:-1:-1;;;;;4056:74:91;;;;;;;;1888:10801;;;;;;;;;;;;;;;;";
629 parse_test(s);
630 }
631}