1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
4pub enum Jump {
5 In,
7 Out,
9 Regular,
11}
12
13impl Jump {
14 pub const fn to_str(self) -> &'static str {
16 match self {
17 Self::In => "i",
18 Self::Out => "o",
19 Self::Regular => "-",
20 }
21 }
22
23 const fn to_int(self) -> u32 {
24 match self {
25 Self::In => 0,
26 Self::Out => 1,
27 Self::Regular => 2,
28 }
29 }
30
31 fn from_int(i: u32) -> Self {
32 match i {
33 0 => Self::In,
34 1 => Self::Out,
35 2 => Self::Regular,
36 _ => unreachable!(),
37 }
38 }
39}
40
41impl fmt::Display for Jump {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 f.write_str(self.to_str())
44 }
45}
46
47#[derive(Debug, thiserror::Error)]
49pub struct SyntaxError(Box<SyntaxErrorInner>);
50
51#[derive(Debug)]
52struct SyntaxErrorInner {
53 pos: Option<usize>,
54 msg: String,
55}
56
57impl fmt::Display for SyntaxError {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 f.write_str("failed to parse source map: ")?;
60 if let Some(pos) = self.0.pos {
61 write!(f, "[{pos}] ")?;
62 }
63 f.write_str(&self.0.msg)
64 }
65}
66
67impl SyntaxError {
68 fn new(pos: impl Into<Option<usize>>, msg: impl Into<String>) -> Self {
69 Self(Box::new(SyntaxErrorInner { pos: pos.into(), msg: msg.into() }))
70 }
71}
72
73impl From<std::num::TryFromIntError> for SyntaxError {
74 fn from(_value: std::num::TryFromIntError) -> Self {
75 Self::new(None, "offset overflow")
76 }
77}
78
79#[derive(PartialEq, Eq)]
80enum Token<'a> {
81 Number(&'a str),
83 Semicolon,
85 Colon,
87 In,
89 Out,
91 Regular,
93}
94
95impl fmt::Debug for Token<'_> {
96 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97 match self {
98 Token::Number(s) => write!(f, "NUMBER({s:?})"),
99 Token::Semicolon => write!(f, "SEMICOLON"),
100 Token::Colon => write!(f, "COLON"),
101 Token::In => write!(f, "JMP(i)"),
102 Token::Out => write!(f, "JMP(o)"),
103 Token::Regular => write!(f, "JMP(-)"),
104 }
105 }
106}
107
108impl fmt::Display for Token<'_> {
109 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110 match self {
111 Token::Number(_) => write!(f, "number"),
112 Token::Semicolon => write!(f, "`;`"),
113 Token::Colon => write!(f, "`:`"),
114 Token::In => write!(f, "jmp-in"),
115 Token::Out => write!(f, "jmp-out"),
116 Token::Regular => write!(f, "jmp"),
117 }
118 }
119}
120
121struct Lexer<'input> {
122 input: &'input str,
123 chars: Peekable<CharIndices<'input>>,
124}
125
126impl<'input> Lexer<'input> {
127 fn new(input: &'input str) -> Self {
128 Lexer { chars: input.char_indices().peekable(), input }
129 }
130
131 fn number(&mut self, start: usize, mut end: usize) -> Token<'input> {
132 loop {
133 if let Some((_, ch)) = self.chars.peek().copied() {
134 if !ch.is_ascii_digit() {
135 break;
136 }
137 self.chars.next();
138 end += 1;
139 } else {
140 end = self.input.len();
141 break;
142 }
143 }
144 Token::Number(&self.input[start..end])
145 }
146}
147
148impl<'input> Iterator for Lexer<'input> {
149 type Item = Result<(Token<'input>, usize), SyntaxError>;
150
151 fn next(&mut self) -> Option<Self::Item> {
152 let (start, ch) = self.chars.next()?;
153 let token = match ch {
154 ';' => Token::Semicolon,
155 ':' => Token::Colon,
156 'i' => Token::In,
157 'o' => Token::Out,
158 '-' => match self.chars.peek() {
159 Some((_, ch)) if ch.is_ascii_digit() => {
160 self.chars.next();
161 self.number(start, start + 2)
162 }
163 _ => Token::Regular,
164 },
165 ch if ch.is_ascii_digit() => self.number(start, start + 1),
166 ch => return Some(Err(SyntaxError::new(start, format!("unexpected character: {ch}")))),
167 };
168 Some(Ok((token, start)))
169 }
170}
171
172pub type SourceMap = Vec<SourceElement>;
177
178#[derive(Clone, PartialEq, Eq, Hash)]
182pub struct SourceElement {
183 offset: u32,
184 length: u32,
185 index: i32,
186 jump_and_modifier_depth: u32,
188}
189
190impl fmt::Debug for SourceElement {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 f.debug_struct("SourceElement")
193 .field("offset", &self.offset())
194 .field("length", &self.length())
195 .field("index", &self.index_i32())
196 .field("jump", &self.jump())
197 .field("modifier_depth", &self.modifier_depth())
198 .field("formatted", &format_args!("{self}"))
199 .finish()
200 }
201}
202
203impl Default for SourceElement {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209impl SourceElement {
210 pub const fn new() -> Self {
212 Self { offset: 0, length: 0, index: -1, jump_and_modifier_depth: 0 }
213 }
214
215 #[deprecated = "use `new` instead"]
217 pub const fn new_invalid() -> Self {
218 Self::new()
219 }
220
221 #[inline]
223 pub const fn offset(&self) -> u32 {
224 self.offset
225 }
226
227 #[inline]
229 pub const fn length(&self) -> u32 {
230 self.length
231 }
232
233 #[inline]
240 pub const fn index(&self) -> Option<u32> {
241 if self.index == -1 { None } else { Some(self.index as u32) }
242 }
243
244 #[inline]
248 pub const fn index_i32(&self) -> i32 {
249 self.index
250 }
251
252 #[inline]
254 pub fn jump(&self) -> Jump {
255 Jump::from_int(self.jump_and_modifier_depth >> 30)
256 }
257
258 #[inline]
259 const fn set_jump(&mut self, jump: Jump) {
260 self.set_jump_and_modifier_depth(jump, self.modifier_depth());
261 }
262
263 #[inline]
268 pub const fn modifier_depth(&self) -> u32 {
269 (self.jump_and_modifier_depth << 2) >> 2
270 }
271
272 #[inline]
273 fn set_modifier_depth(&mut self, modifier_depth: usize) -> Result<(), SyntaxError> {
274 if modifier_depth > (1 << 30) - 1 {
275 return Err(SyntaxError::new(None, "modifier depth overflow"));
276 }
277 self.set_jump_and_modifier_depth(self.jump(), modifier_depth as u32);
278 Ok(())
279 }
280
281 #[inline]
282 const fn set_jump_and_modifier_depth(&mut self, jump: Jump, modifier_depth: u32) {
283 self.jump_and_modifier_depth = (jump.to_int() << 30) | modifier_depth;
284 }
285}
286
287impl fmt::Display for SourceElement {
288 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
289 write!(
290 f,
291 "{}:{}:{}:{}:{}",
292 self.offset(),
293 self.length(),
294 self.index_i32(),
295 self.jump(),
296 self.modifier_depth(),
297 )
298 }
299}
300
301#[derive(Default)]
302struct SourceElementBuilder {
303 offset: Option<usize>,
304 length: Option<usize>,
305 index: Option<Option<u32>>,
306 jump: Option<Jump>,
307 modifier_depth: Option<usize>,
308}
309
310impl fmt::Display for SourceElementBuilder {
311 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312 if self.offset.is_none()
313 && self.length.is_none()
314 && self.index.is_none()
315 && self.jump.is_none()
316 && self.modifier_depth.is_none()
317 {
318 return Ok(());
319 }
320
321 if let Some(s) = self.offset {
322 if s == 0 && self.index == Some(None) {
323 f.write_str("-1")?;
324 } else {
325 write!(f, "{s}")?;
326 }
327 }
328 if self.length.is_none()
329 && self.index.is_none()
330 && self.jump.is_none()
331 && self.modifier_depth.is_none()
332 {
333 return Ok(());
334 }
335 f.write_char(':')?;
336
337 if let Some(s) = self.length {
338 if s == 0 && self.index == Some(None) {
339 f.write_str("-1")?;
340 } else {
341 write!(f, "{s}")?;
342 }
343 }
344 if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
345 return Ok(());
346 }
347 f.write_char(':')?;
348
349 if let Some(s) = self.index {
350 let s = s.map(|s| s as i64).unwrap_or(-1);
351 write!(f, "{s}")?;
352 }
353 if self.jump.is_none() && self.modifier_depth.is_none() {
354 return Ok(());
355 }
356 f.write_char(':')?;
357
358 if let Some(s) = self.jump {
359 write!(f, "{s}")?;
360 }
361 if self.modifier_depth.is_none() {
362 return Ok(());
363 }
364 f.write_char(':')?;
365
366 if let Some(s) = self.modifier_depth {
367 if self.index == Some(None) {
368 f.write_str("-1")?;
369 } else {
370 s.fmt(f)?;
371 }
372 }
373
374 Ok(())
375 }
376}
377
378impl SourceElementBuilder {
379 fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
380 let mut element = prev.unwrap_or_default();
381 macro_rules! get_field {
382 (| $field:ident | $e:expr) => {
383 if let Some($field) = self.$field {
384 $e;
385 }
386 };
387 }
388 get_field!(|offset| element.offset = offset.try_into()?);
389 get_field!(|length| element.length = length.try_into()?);
390 get_field!(|index| element.index = index.map(|x| x as i32).unwrap_or(-1));
391 get_field!(|jump| element.set_jump(jump));
392 if let Some(modifier_depth) = self.modifier_depth {
394 element.set_modifier_depth(modifier_depth)?;
395 }
396 Ok(element)
397 }
398
399 fn set_jmp(&mut self, jmp: Jump, pos: usize) -> Result<(), SyntaxError> {
400 if self.jump.is_some() {
401 return Err(SyntaxError::new(pos, "jump already set"));
402 }
403 self.jump = Some(jmp);
404 Ok(())
405 }
406
407 fn set_offset(&mut self, offset: usize, pos: usize) -> Result<(), SyntaxError> {
408 if self.offset.is_some() {
409 return Err(SyntaxError::new(pos, "offset already set"));
410 }
411 self.offset = Some(offset);
412 Ok(())
413 }
414
415 fn set_length(&mut self, length: usize, pos: usize) -> Result<(), SyntaxError> {
416 if self.length.is_some() {
417 return Err(SyntaxError::new(pos, "length already set"));
418 }
419 self.length = Some(length);
420 Ok(())
421 }
422
423 fn set_index(&mut self, index: Option<u32>, pos: usize) -> Result<(), SyntaxError> {
424 if self.index.is_some() {
425 return Err(SyntaxError::new(pos, "index already set"));
426 }
427 self.index = Some(index);
428 Ok(())
429 }
430
431 fn set_modifier(&mut self, modifier_depth: usize, pos: usize) -> Result<(), SyntaxError> {
432 if self.modifier_depth.is_some() {
433 return Err(SyntaxError::new(pos, "modifier depth already set"));
434 }
435 self.modifier_depth = Some(modifier_depth);
436 Ok(())
437 }
438}
439
440pub struct Parser<'input> {
441 lexer: Lexer<'input>,
442 last_element: Option<SourceElement>,
443 done: bool,
444 #[cfg(test)]
445 output: Option<&'input mut dyn Write>,
446}
447
448impl<'input> Parser<'input> {
449 pub fn new(input: &'input str) -> Self {
450 Self {
451 done: input.is_empty(),
452 lexer: Lexer::new(input),
453 last_element: None,
454 #[cfg(test)]
455 output: None,
456 }
457 }
458
459 fn advance(&mut self) -> Result<Option<SourceElement>, SyntaxError> {
460 let mut state = State::Offset;
462 let mut builder = SourceElementBuilder::default();
463
464 let parse_number = |num: &str, pos: usize| {
465 let num = match num.parse::<i64>() {
466 Ok(num) => num,
467 Err(e) => return Err(SyntaxError::new(pos, e.to_string())),
468 };
469 match num {
470 ..-1 => Err(SyntaxError::new(pos, "unexpected negative number")),
471 -1 => Ok(None),
472 0.. => u32::try_from(num)
473 .map(Some)
474 .map_err(|_| SyntaxError::new(pos, "number too large")),
475 }
476 };
477
478 loop {
479 match self.lexer.next() {
480 Some(Ok((token, pos))) => match token {
481 Token::Semicolon => break,
482 Token::Number(num) => match state {
483 State::Offset => {
484 builder
485 .set_offset(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
486 }
487 State::Length => {
488 builder
489 .set_length(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
490 }
491 State::Index => {
492 builder.set_index(parse_number(num, pos)?, pos)?;
493 }
494 State::Modifier => builder
495 .set_modifier(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?,
496 State::Jmp => {
497 return Err(SyntaxError::new(pos, "expected jump, found number"));
498 }
499 },
500 Token::Colon => state.advance(pos)?,
501 Token::In => builder.set_jmp(Jump::In, pos)?,
502 Token::Out => builder.set_jmp(Jump::Out, pos)?,
503 Token::Regular => builder.set_jmp(Jump::Regular, pos)?,
504 },
505 Some(Err(err)) => return Err(err),
506 None => {
507 if self.done {
508 return Ok(None);
509 }
510 self.done = true;
511 break;
512 }
513 }
514 }
515
516 #[cfg(test)]
517 if let Some(out) = self.output.as_mut() {
518 if self.last_element.is_some() {
519 out.write_char(';').unwrap();
520 }
521 write!(out, "{builder}").unwrap();
522 }
523
524 let element = builder.finish(self.last_element.take())?;
525 self.last_element = Some(element.clone());
526 Ok(Some(element))
527 }
528}
529
530impl Iterator for Parser<'_> {
531 type Item = Result<SourceElement, SyntaxError>;
532
533 fn next(&mut self) -> Option<Self::Item> {
534 self.advance().transpose()
535 }
536}
537
538#[derive(Clone, Copy, PartialEq, Eq)]
540enum State {
541 Offset,
543 Length,
545 Index,
547 Jmp,
549 Modifier,
551}
552
553impl State {
554 fn advance(&mut self, pos: usize) -> Result<(), SyntaxError> {
555 *self = match self {
556 Self::Offset => Self::Length,
557 Self::Length => Self::Index,
558 Self::Index => Self::Jmp,
559 Self::Jmp => Self::Modifier,
560 Self::Modifier => return Err(SyntaxError::new(pos, "unexpected colon")),
561 };
562 Ok(())
563 }
564}
565
566pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
568 Parser::new(input).collect::<Result<SourceMap, SyntaxError>>().map(|mut v| {
569 v.shrink_to_fit();
570 v
571 })
572}
573
574#[cfg(test)]
575mod tests {
576 use super::*;
577
578 fn parse_test(input: &str) {
579 match parse_test_(input) {
580 Ok(_) => {}
581 Err(e) => panic!("{e}"),
582 }
583 }
584
585 fn parse_test_(input: &str) -> Result<SourceMap, SyntaxError> {
586 let mut s = String::new();
587 let mut p = Parser::new(input);
588 p.output = Some(&mut s);
589 let sm = p.collect::<Result<SourceMap, _>>()?;
590 if s != input {
591 return Err(SyntaxError::new(
592 None,
593 format!(
594 "mismatched output:\n actual: {s:?}\n expected: {input:?}\n sm: {sm:#?}"
595 ),
596 ));
597 }
598 Ok(sm)
599 }
600
601 #[test]
602 fn empty() {
603 parse_test("");
604 }
605
606 #[test]
607 fn source_maps() {
608 let source_maps = include_str!("../../../../test-data/out-source-maps.txt");
610
611 for (line, s) in source_maps.lines().enumerate() {
612 let line = line + 1;
613 parse_test_(s).unwrap_or_else(|e| panic!("Failed to parse line {line}: {e}\n{s:?}"));
614 }
615 }
616
617 #[test]
618 fn cheatcodes() {
619 let s = include_str!("../../../../test-data/cheatcodes.sol-sourcemap.txt");
620 parse_test(s);
621 }
622
623 #[test]
625 fn univ4_deployer() {
626 let s = ":::-:0;;1888:10801:91;2615:100;;;2679:3;2615:100;;;;2700:4;2615:100;;;;-1:-1:-1;2615:100:91;;;;2546:169;;;-1:-1:-1;;2546:169:91;;;;;;;;;;;2615:100;2546:169;;;2615:100;2797:101;;;;;;;;;-1:-1:-1;;2797:101:91;;;;;;;;2546:169;2721:177;;;;;;;;;;;;;;;;;;2957:101;1888:10801;2957:101;2797;2957;;;-1:-1:-1;;2957:101:91;;;;356:29:89;2957:101:91;;;;2904:154;;;-1:-1:-1;;2904:154:91;;;;;;;;;;;;-1:-1:-1;;;;;;2904:154:91;;;;;;;;4018:32;;;;;4048:2;4018:32;;;4056:74;;;-1:-1:-1;;;;;4056:74:91;;;;;;;;1888:10801;;;;;;;;;;;;;;;;";
627 parse_test(s);
628 }
629}