foundry_compilers_artifacts_solc/
sourcemap.rs

1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
4pub enum Jump {
5    /// A jump instruction that goes into a function
6    In,
7    /// A jump  represents an instruction that returns from a function
8    Out,
9    /// A regular jump instruction
10    Regular,
11}
12
13impl Jump {
14    /// Returns the string representation of the jump instruction.
15    pub fn to_str(self) -> &'static str {
16        match self {
17            Self::In => "i",
18            Self::Out => "o",
19            Self::Regular => "-",
20        }
21    }
22
23    fn to_int(self) -> u32 {
24        match self {
25            Self::In => 0,
26            Self::Out => 1,
27            Self::Regular => 2,
28        }
29    }
30
31    fn from_int(i: u32) -> Self {
32        match i {
33            0 => Self::In,
34            1 => Self::Out,
35            2 => Self::Regular,
36            _ => unreachable!(),
37        }
38    }
39}
40
41impl fmt::Display for Jump {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        f.write_str(self.to_str())
44    }
45}
46
47/// An error that can happen during source map parsing.
48#[derive(Debug, thiserror::Error)]
49pub struct SyntaxError(Box<SyntaxErrorInner>);
50
51#[derive(Debug)]
52struct SyntaxErrorInner {
53    pos: Option<usize>,
54    msg: String,
55}
56
57impl fmt::Display for SyntaxError {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        f.write_str("failed to parse source map: ")?;
60        if let Some(pos) = self.0.pos {
61            write!(f, "[{pos}] ")?;
62        }
63        f.write_str(&self.0.msg)
64    }
65}
66
67impl SyntaxError {
68    fn new(pos: impl Into<Option<usize>>, msg: impl Into<String>) -> Self {
69        Self(Box::new(SyntaxErrorInner { pos: pos.into(), msg: msg.into() }))
70    }
71}
72
73impl From<std::num::TryFromIntError> for SyntaxError {
74    fn from(_value: std::num::TryFromIntError) -> Self {
75        Self::new(None, "offset overflow")
76    }
77}
78
79#[derive(PartialEq, Eq)]
80enum Token<'a> {
81    /// Decimal number
82    Number(&'a str),
83    /// `;`
84    Semicolon,
85    /// `:`
86    Colon,
87    /// `i` which represents an instruction that goes into a function
88    In,
89    /// `o` which represents an instruction that returns from a function
90    Out,
91    /// `-` regular jump
92    Regular,
93}
94
95impl fmt::Debug for Token<'_> {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        match self {
98            Token::Number(s) => write!(f, "NUMBER({s:?})"),
99            Token::Semicolon => write!(f, "SEMICOLON"),
100            Token::Colon => write!(f, "COLON"),
101            Token::In => write!(f, "JMP(i)"),
102            Token::Out => write!(f, "JMP(o)"),
103            Token::Regular => write!(f, "JMP(-)"),
104        }
105    }
106}
107
108impl fmt::Display for Token<'_> {
109    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        match self {
111            Token::Number(_) => write!(f, "number"),
112            Token::Semicolon => write!(f, "`;`"),
113            Token::Colon => write!(f, "`:`"),
114            Token::In => write!(f, "jmp-in"),
115            Token::Out => write!(f, "jmp-out"),
116            Token::Regular => write!(f, "jmp"),
117        }
118    }
119}
120
121struct Lexer<'input> {
122    input: &'input str,
123    chars: Peekable<CharIndices<'input>>,
124}
125
126impl<'input> Lexer<'input> {
127    fn new(input: &'input str) -> Self {
128        Lexer { chars: input.char_indices().peekable(), input }
129    }
130
131    fn number(&mut self, start: usize, mut end: usize) -> Token<'input> {
132        loop {
133            if let Some((_, ch)) = self.chars.peek().cloned() {
134                if !ch.is_ascii_digit() {
135                    break;
136                }
137                self.chars.next();
138                end += 1;
139            } else {
140                end = self.input.len();
141                break;
142            }
143        }
144        Token::Number(&self.input[start..end])
145    }
146}
147
148impl<'input> Iterator for Lexer<'input> {
149    type Item = Result<(Token<'input>, usize), SyntaxError>;
150
151    fn next(&mut self) -> Option<Self::Item> {
152        let (start, ch) = self.chars.next()?;
153        let token = match ch {
154            ';' => Token::Semicolon,
155            ':' => Token::Colon,
156            'i' => Token::In,
157            'o' => Token::Out,
158            '-' => match self.chars.peek() {
159                Some((_, ch)) if ch.is_ascii_digit() => {
160                    self.chars.next();
161                    self.number(start, start + 2)
162                }
163                _ => Token::Regular,
164            },
165            ch if ch.is_ascii_digit() => self.number(start, start + 1),
166            ch => return Some(Err(SyntaxError::new(start, format!("unexpected character: {ch}")))),
167        };
168        Some(Ok((token, start)))
169    }
170}
171
172/// A Solidity source map, which is composed of multiple [`SourceElement`]s, separated by
173/// semicolons.
174///
175/// Solidity reference: <https://docs.soliditylang.org/en/latest/internals/source_mappings.html#source-mappings>
176pub type SourceMap = Vec<SourceElement>;
177
178/// A single element in a [`SourceMap`].
179///
180/// Solidity reference: <https://docs.soliditylang.org/en/latest/internals/source_mappings.html#source-mappings>
181#[derive(Clone, PartialEq, Eq, Hash)]
182pub struct SourceElement {
183    offset: u32,
184    length: u32,
185    index: i32,
186    // 2 bits for jump, 30 bits for modifier depth; see [set_jump_and_modifier_depth]
187    jump_and_modifier_depth: u32,
188}
189
190impl fmt::Debug for SourceElement {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        f.debug_struct("SourceElement")
193            .field("offset", &self.offset())
194            .field("length", &self.length())
195            .field("index", &self.index_i32())
196            .field("jump", &self.jump())
197            .field("modifier_depth", &self.modifier_depth())
198            .field("formatted", &format_args!("{self}"))
199            .finish()
200    }
201}
202
203impl Default for SourceElement {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209impl SourceElement {
210    /// Creates a new source element with default values.
211    pub fn new() -> Self {
212        Self { offset: 0, length: 0, index: -1, jump_and_modifier_depth: 0 }
213    }
214
215    /// Creates a new source element with default values.
216    #[deprecated = "use `new` instead"]
217    pub fn new_invalid() -> Self {
218        Self::new()
219    }
220
221    /// The byte-offset to the start of the range in the source file.
222    #[inline]
223    pub fn offset(&self) -> u32 {
224        self.offset
225    }
226
227    /// The length of the source range in bytes.
228    #[inline]
229    pub fn length(&self) -> u32 {
230        self.length
231    }
232
233    /// The source index.
234    ///
235    /// Note: In the case of instructions that are not associated with any particular source file,
236    /// the source mapping assigns an integer identifier of -1. This may happen for bytecode
237    /// sections stemming from compiler-generated inline assembly statements.
238    /// This case is represented as a `None` value.
239    #[inline]
240    pub fn index(&self) -> Option<u32> {
241        if self.index == -1 {
242            None
243        } else {
244            Some(self.index as u32)
245        }
246    }
247
248    /// The source index.
249    ///
250    /// See [`Self::index`] for more information.
251    #[inline]
252    pub fn index_i32(&self) -> i32 {
253        self.index
254    }
255
256    /// Jump instruction.
257    #[inline]
258    pub fn jump(&self) -> Jump {
259        Jump::from_int(self.jump_and_modifier_depth >> 30)
260    }
261
262    #[inline]
263    fn set_jump(&mut self, jump: Jump) {
264        self.set_jump_and_modifier_depth(jump, self.modifier_depth());
265    }
266
267    /// Modifier depth.
268    ///
269    /// This depth is increased whenever the placeholder statement (`_`) is entered in a modifier
270    /// and decreased when it is left again.
271    #[inline]
272    pub fn modifier_depth(&self) -> u32 {
273        (self.jump_and_modifier_depth << 2) >> 2
274    }
275
276    #[inline]
277    fn set_modifier_depth(&mut self, modifier_depth: usize) -> Result<(), SyntaxError> {
278        if modifier_depth > (1 << 30) - 1 {
279            return Err(SyntaxError::new(None, "modifier depth overflow"));
280        }
281        self.set_jump_and_modifier_depth(self.jump(), modifier_depth as u32);
282        Ok(())
283    }
284
285    #[inline]
286    fn set_jump_and_modifier_depth(&mut self, jump: Jump, modifier_depth: u32) {
287        self.jump_and_modifier_depth = (jump.to_int() << 30) | modifier_depth;
288    }
289}
290
291impl fmt::Display for SourceElement {
292    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293        write!(
294            f,
295            "{}:{}:{}:{}:{}",
296            self.offset(),
297            self.length(),
298            self.index_i32(),
299            self.jump(),
300            self.modifier_depth(),
301        )
302    }
303}
304
305#[derive(Default)]
306struct SourceElementBuilder {
307    offset: Option<usize>,
308    length: Option<usize>,
309    index: Option<Option<u32>>,
310    jump: Option<Jump>,
311    modifier_depth: Option<usize>,
312}
313
314impl fmt::Display for SourceElementBuilder {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        if self.offset.is_none()
317            && self.length.is_none()
318            && self.index.is_none()
319            && self.jump.is_none()
320            && self.modifier_depth.is_none()
321        {
322            return Ok(());
323        }
324
325        if let Some(s) = self.offset {
326            if s == 0 && self.index == Some(None) {
327                f.write_str("-1")?;
328            } else {
329                write!(f, "{s}")?;
330            }
331        }
332        if self.length.is_none()
333            && self.index.is_none()
334            && self.jump.is_none()
335            && self.modifier_depth.is_none()
336        {
337            return Ok(());
338        }
339        f.write_char(':')?;
340
341        if let Some(s) = self.length {
342            if s == 0 && self.index == Some(None) {
343                f.write_str("-1")?;
344            } else {
345                write!(f, "{s}")?;
346            }
347        }
348        if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
349            return Ok(());
350        }
351        f.write_char(':')?;
352
353        if let Some(s) = self.index {
354            let s = s.map(|s| s as i64).unwrap_or(-1);
355            write!(f, "{s}")?;
356        }
357        if self.jump.is_none() && self.modifier_depth.is_none() {
358            return Ok(());
359        }
360        f.write_char(':')?;
361
362        if let Some(s) = self.jump {
363            write!(f, "{s}")?;
364        }
365        if self.modifier_depth.is_none() {
366            return Ok(());
367        }
368        f.write_char(':')?;
369
370        if let Some(s) = self.modifier_depth {
371            if self.index == Some(None) {
372                f.write_str("-1")?;
373            } else {
374                s.fmt(f)?;
375            }
376        }
377
378        Ok(())
379    }
380}
381
382impl SourceElementBuilder {
383    fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
384        let mut element = prev.unwrap_or_default();
385        macro_rules! get_field {
386            (| $field:ident | $e:expr) => {
387                if let Some($field) = self.$field {
388                    $e;
389                }
390            };
391        }
392        get_field!(|offset| element.offset = offset.try_into()?);
393        get_field!(|length| element.length = length.try_into()?);
394        get_field!(|index| element.index = index.map(|x| x as i32).unwrap_or(-1));
395        get_field!(|jump| element.set_jump(jump));
396        // Modifier depth is optional.
397        if let Some(modifier_depth) = self.modifier_depth {
398            element.set_modifier_depth(modifier_depth)?;
399        }
400        Ok(element)
401    }
402
403    fn set_jmp(&mut self, jmp: Jump, pos: usize) -> Result<(), SyntaxError> {
404        if self.jump.is_some() {
405            return Err(SyntaxError::new(pos, "jump already set"));
406        }
407        self.jump = Some(jmp);
408        Ok(())
409    }
410
411    fn set_offset(&mut self, offset: usize, pos: usize) -> Result<(), SyntaxError> {
412        if self.offset.is_some() {
413            return Err(SyntaxError::new(pos, "offset already set"));
414        }
415        self.offset = Some(offset);
416        Ok(())
417    }
418
419    fn set_length(&mut self, length: usize, pos: usize) -> Result<(), SyntaxError> {
420        if self.length.is_some() {
421            return Err(SyntaxError::new(pos, "length already set"));
422        }
423        self.length = Some(length);
424        Ok(())
425    }
426
427    fn set_index(&mut self, index: Option<u32>, pos: usize) -> Result<(), SyntaxError> {
428        if self.index.is_some() {
429            return Err(SyntaxError::new(pos, "index already set"));
430        }
431        self.index = Some(index);
432        Ok(())
433    }
434
435    fn set_modifier(&mut self, modifier_depth: usize, pos: usize) -> Result<(), SyntaxError> {
436        if self.modifier_depth.is_some() {
437            return Err(SyntaxError::new(pos, "modifier depth already set"));
438        }
439        self.modifier_depth = Some(modifier_depth);
440        Ok(())
441    }
442}
443
444pub struct Parser<'input> {
445    lexer: Lexer<'input>,
446    last_element: Option<SourceElement>,
447    done: bool,
448    #[cfg(test)]
449    output: Option<&'input mut dyn Write>,
450}
451
452impl<'input> Parser<'input> {
453    pub fn new(input: &'input str) -> Self {
454        Self {
455            done: input.is_empty(),
456            lexer: Lexer::new(input),
457            last_element: None,
458            #[cfg(test)]
459            output: None,
460        }
461    }
462
463    fn advance(&mut self) -> Result<Option<SourceElement>, SyntaxError> {
464        // start parsing at the offset state, `s`
465        let mut state = State::Offset;
466        let mut builder = SourceElementBuilder::default();
467
468        let parse_number = |num: &str, pos: usize| {
469            let num = match num.parse::<i64>() {
470                Ok(num) => num,
471                Err(e) => return Err(SyntaxError::new(pos, e.to_string())),
472            };
473            match num {
474                ..-1 => Err(SyntaxError::new(pos, "unexpected negative number")),
475                -1 => Ok(None),
476                0.. => u32::try_from(num)
477                    .map(Some)
478                    .map_err(|_| SyntaxError::new(pos, "number too large")),
479            }
480        };
481
482        loop {
483            match self.lexer.next() {
484                Some(Ok((token, pos))) => match token {
485                    Token::Semicolon => break,
486                    Token::Number(num) => match state {
487                        State::Offset => {
488                            builder
489                                .set_offset(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
490                        }
491                        State::Length => {
492                            builder
493                                .set_length(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
494                        }
495                        State::Index => {
496                            builder.set_index(parse_number(num, pos)?, pos)?;
497                        }
498                        State::Modifier => builder
499                            .set_modifier(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?,
500                        State::Jmp => {
501                            return Err(SyntaxError::new(pos, "expected jump, found number"));
502                        }
503                    },
504                    Token::Colon => state.advance(pos)?,
505                    Token::In => builder.set_jmp(Jump::In, pos)?,
506                    Token::Out => builder.set_jmp(Jump::Out, pos)?,
507                    Token::Regular => builder.set_jmp(Jump::Regular, pos)?,
508                },
509                Some(Err(err)) => return Err(err),
510                None => {
511                    if self.done {
512                        return Ok(None);
513                    }
514                    self.done = true;
515                    break;
516                }
517            }
518        }
519
520        #[cfg(test)]
521        if let Some(out) = self.output.as_mut() {
522            if self.last_element.is_some() {
523                out.write_char(';').unwrap();
524            }
525            write!(out, "{builder}").unwrap();
526        }
527
528        let element = builder.finish(self.last_element.take())?;
529        self.last_element = Some(element.clone());
530        Ok(Some(element))
531    }
532}
533
534impl Iterator for Parser<'_> {
535    type Item = Result<SourceElement, SyntaxError>;
536
537    fn next(&mut self) -> Option<Self::Item> {
538        self.advance().transpose()
539    }
540}
541
542/// State machine to keep track of separating `:`
543#[derive(Clone, Copy, PartialEq, Eq)]
544enum State {
545    // s
546    Offset,
547    // l
548    Length,
549    // f
550    Index,
551    // j
552    Jmp,
553    // m
554    Modifier,
555}
556
557impl State {
558    fn advance(&mut self, pos: usize) -> Result<(), SyntaxError> {
559        *self = match self {
560            Self::Offset => Self::Length,
561            Self::Length => Self::Index,
562            Self::Index => Self::Jmp,
563            Self::Jmp => Self::Modifier,
564            Self::Modifier => return Err(SyntaxError::new(pos, "unexpected colon")),
565        };
566        Ok(())
567    }
568}
569
570/// Parses a source map.
571pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
572    Parser::new(input).collect::<Result<SourceMap, SyntaxError>>().map(|mut v| {
573        v.shrink_to_fit();
574        v
575    })
576}
577
578#[cfg(test)]
579mod tests {
580    use super::*;
581
582    fn parse_test(input: &str) {
583        match parse_test_(input) {
584            Ok(_) => {}
585            Err(e) => panic!("{e}"),
586        }
587    }
588
589    fn parse_test_(input: &str) -> Result<SourceMap, SyntaxError> {
590        let mut s = String::new();
591        let mut p = Parser::new(input);
592        p.output = Some(&mut s);
593        let sm = p.collect::<Result<SourceMap, _>>()?;
594        if s != input {
595            return Err(SyntaxError::new(
596                None,
597                format!("mismatched output:\n   actual: {s:?}\n expected: {input:?}\n       sm: {sm:#?}"),
598            ));
599        }
600        Ok(sm)
601    }
602
603    #[test]
604    fn empty() {
605        parse_test("");
606    }
607
608    #[test]
609    fn source_maps() {
610        // all source maps from the compiler output test data
611        let source_maps = include_str!("../../../../test-data/out-source-maps.txt");
612
613        for (line, s) in source_maps.lines().enumerate() {
614            let line = line + 1;
615            parse_test_(s).unwrap_or_else(|e| panic!("Failed to parse line {line}: {e}\n{s:?}"));
616        }
617    }
618
619    #[test]
620    fn cheatcodes() {
621        let s = include_str!("../../../../test-data/cheatcodes.sol-sourcemap.txt");
622        parse_test(s);
623    }
624
625    // https://github.com/foundry-rs/foundry/issues/8986
626    #[test]
627    fn univ4_deployer() {
628        let s = ":::-:0;;1888:10801:91;2615:100;;;2679:3;2615:100;;;;2700:4;2615:100;;;;-1:-1:-1;2615:100:91;;;;2546:169;;;-1:-1:-1;;2546:169:91;;;;;;;;;;;2615:100;2546:169;;;2615:100;2797:101;;;;;;;;;-1:-1:-1;;2797:101:91;;;;;;;;2546:169;2721:177;;;;;;;;;;;;;;;;;;2957:101;1888:10801;2957:101;2797;2957;;;-1:-1:-1;;2957:101:91;;;;356:29:89;2957:101:91;;;;2904:154;;;-1:-1:-1;;2904:154:91;;;;;;;;;;;;-1:-1:-1;;;;;;2904:154:91;;;;;;;;4018:32;;;;;4048:2;4018:32;;;4056:74;;;-1:-1:-1;;;;;4056:74:91;;;;;;;;1888:10801;;;;;;;;;;;;;;;;";
629        parse_test(s);
630    }
631}