Skip to main content

foundry_compilers_artifacts_solc/
sourcemap.rs

1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
4pub enum Jump {
5    /// A jump instruction that goes into a function
6    In,
7    /// A jump  represents an instruction that returns from a function
8    Out,
9    /// A regular jump instruction
10    Regular,
11}
12
13impl Jump {
14    /// Returns the string representation of the jump instruction.
15    pub const fn to_str(self) -> &'static str {
16        match self {
17            Self::In => "i",
18            Self::Out => "o",
19            Self::Regular => "-",
20        }
21    }
22
23    const fn to_int(self) -> u32 {
24        match self {
25            Self::In => 0,
26            Self::Out => 1,
27            Self::Regular => 2,
28        }
29    }
30
31    fn from_int(i: u32) -> Self {
32        match i {
33            0 => Self::In,
34            1 => Self::Out,
35            2 => Self::Regular,
36            _ => unreachable!(),
37        }
38    }
39}
40
41impl fmt::Display for Jump {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        f.write_str(self.to_str())
44    }
45}
46
47/// An error that can happen during source map parsing.
48#[derive(Debug, thiserror::Error)]
49pub struct SyntaxError(Box<SyntaxErrorInner>);
50
51#[derive(Debug)]
52struct SyntaxErrorInner {
53    pos: Option<usize>,
54    msg: String,
55}
56
57impl fmt::Display for SyntaxError {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        f.write_str("failed to parse source map: ")?;
60        if let Some(pos) = self.0.pos {
61            write!(f, "[{pos}] ")?;
62        }
63        f.write_str(&self.0.msg)
64    }
65}
66
67impl SyntaxError {
68    fn new(pos: impl Into<Option<usize>>, msg: impl Into<String>) -> Self {
69        Self(Box::new(SyntaxErrorInner { pos: pos.into(), msg: msg.into() }))
70    }
71}
72
73impl From<std::num::TryFromIntError> for SyntaxError {
74    fn from(_value: std::num::TryFromIntError) -> Self {
75        Self::new(None, "offset overflow")
76    }
77}
78
79#[derive(PartialEq, Eq)]
80enum Token<'a> {
81    /// Decimal number
82    Number(&'a str),
83    /// `;`
84    Semicolon,
85    /// `:`
86    Colon,
87    /// `i` which represents an instruction that goes into a function
88    In,
89    /// `o` which represents an instruction that returns from a function
90    Out,
91    /// `-` regular jump
92    Regular,
93}
94
95impl fmt::Debug for Token<'_> {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        match self {
98            Token::Number(s) => write!(f, "NUMBER({s:?})"),
99            Token::Semicolon => write!(f, "SEMICOLON"),
100            Token::Colon => write!(f, "COLON"),
101            Token::In => write!(f, "JMP(i)"),
102            Token::Out => write!(f, "JMP(o)"),
103            Token::Regular => write!(f, "JMP(-)"),
104        }
105    }
106}
107
108impl fmt::Display for Token<'_> {
109    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        match self {
111            Token::Number(_) => write!(f, "number"),
112            Token::Semicolon => write!(f, "`;`"),
113            Token::Colon => write!(f, "`:`"),
114            Token::In => write!(f, "jmp-in"),
115            Token::Out => write!(f, "jmp-out"),
116            Token::Regular => write!(f, "jmp"),
117        }
118    }
119}
120
121struct Lexer<'input> {
122    input: &'input str,
123    chars: Peekable<CharIndices<'input>>,
124}
125
126impl<'input> Lexer<'input> {
127    fn new(input: &'input str) -> Self {
128        Lexer { chars: input.char_indices().peekable(), input }
129    }
130
131    fn number(&mut self, start: usize, mut end: usize) -> Token<'input> {
132        loop {
133            if let Some((_, ch)) = self.chars.peek().copied() {
134                if !ch.is_ascii_digit() {
135                    break;
136                }
137                self.chars.next();
138                end += 1;
139            } else {
140                end = self.input.len();
141                break;
142            }
143        }
144        Token::Number(&self.input[start..end])
145    }
146}
147
148impl<'input> Iterator for Lexer<'input> {
149    type Item = Result<(Token<'input>, usize), SyntaxError>;
150
151    fn next(&mut self) -> Option<Self::Item> {
152        let (start, ch) = self.chars.next()?;
153        let token = match ch {
154            ';' => Token::Semicolon,
155            ':' => Token::Colon,
156            'i' => Token::In,
157            'o' => Token::Out,
158            '-' => match self.chars.peek() {
159                Some((_, ch)) if ch.is_ascii_digit() => {
160                    self.chars.next();
161                    self.number(start, start + 2)
162                }
163                _ => Token::Regular,
164            },
165            ch if ch.is_ascii_digit() => self.number(start, start + 1),
166            ch => return Some(Err(SyntaxError::new(start, format!("unexpected character: {ch}")))),
167        };
168        Some(Ok((token, start)))
169    }
170}
171
172/// A Solidity source map, which is composed of multiple [`SourceElement`]s, separated by
173/// semicolons.
174///
175/// Solidity reference: <https://docs.soliditylang.org/en/latest/internals/source_mappings.html#source-mappings>
176pub type SourceMap = Vec<SourceElement>;
177
178/// A single element in a [`SourceMap`].
179///
180/// Solidity reference: <https://docs.soliditylang.org/en/latest/internals/source_mappings.html#source-mappings>
181#[derive(Clone, PartialEq, Eq, Hash)]
182pub struct SourceElement {
183    offset: u32,
184    length: u32,
185    index: i32,
186    // 2 bits for jump, 30 bits for modifier depth; see [set_jump_and_modifier_depth]
187    jump_and_modifier_depth: u32,
188}
189
190impl fmt::Debug for SourceElement {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        f.debug_struct("SourceElement")
193            .field("offset", &self.offset())
194            .field("length", &self.length())
195            .field("index", &self.index_i32())
196            .field("jump", &self.jump())
197            .field("modifier_depth", &self.modifier_depth())
198            .field("formatted", &format_args!("{self}"))
199            .finish()
200    }
201}
202
203impl Default for SourceElement {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209impl SourceElement {
210    /// Creates a new source element with default values.
211    pub const fn new() -> Self {
212        Self { offset: 0, length: 0, index: -1, jump_and_modifier_depth: 0 }
213    }
214
215    /// Creates a new source element with default values.
216    #[deprecated = "use `new` instead"]
217    pub const fn new_invalid() -> Self {
218        Self::new()
219    }
220
221    /// The byte-offset to the start of the range in the source file.
222    #[inline]
223    pub const fn offset(&self) -> u32 {
224        self.offset
225    }
226
227    /// The length of the source range in bytes.
228    #[inline]
229    pub const fn length(&self) -> u32 {
230        self.length
231    }
232
233    /// The source index.
234    ///
235    /// Note: In the case of instructions that are not associated with any particular source file,
236    /// the source mapping assigns an integer identifier of -1. This may happen for bytecode
237    /// sections stemming from compiler-generated inline assembly statements.
238    /// This case is represented as a `None` value.
239    #[inline]
240    pub const fn index(&self) -> Option<u32> {
241        if self.index == -1 { None } else { Some(self.index as u32) }
242    }
243
244    /// The source index.
245    ///
246    /// See [`Self::index`] for more information.
247    #[inline]
248    pub const fn index_i32(&self) -> i32 {
249        self.index
250    }
251
252    /// Jump instruction.
253    #[inline]
254    pub fn jump(&self) -> Jump {
255        Jump::from_int(self.jump_and_modifier_depth >> 30)
256    }
257
258    #[inline]
259    const fn set_jump(&mut self, jump: Jump) {
260        self.set_jump_and_modifier_depth(jump, self.modifier_depth());
261    }
262
263    /// Modifier depth.
264    ///
265    /// This depth is increased whenever the placeholder statement (`_`) is entered in a modifier
266    /// and decreased when it is left again.
267    #[inline]
268    pub const fn modifier_depth(&self) -> u32 {
269        (self.jump_and_modifier_depth << 2) >> 2
270    }
271
272    #[inline]
273    fn set_modifier_depth(&mut self, modifier_depth: usize) -> Result<(), SyntaxError> {
274        if modifier_depth > (1 << 30) - 1 {
275            return Err(SyntaxError::new(None, "modifier depth overflow"));
276        }
277        self.set_jump_and_modifier_depth(self.jump(), modifier_depth as u32);
278        Ok(())
279    }
280
281    #[inline]
282    const fn set_jump_and_modifier_depth(&mut self, jump: Jump, modifier_depth: u32) {
283        self.jump_and_modifier_depth = (jump.to_int() << 30) | modifier_depth;
284    }
285}
286
287impl fmt::Display for SourceElement {
288    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
289        write!(
290            f,
291            "{}:{}:{}:{}:{}",
292            self.offset(),
293            self.length(),
294            self.index_i32(),
295            self.jump(),
296            self.modifier_depth(),
297        )
298    }
299}
300
301#[derive(Default)]
302struct SourceElementBuilder {
303    offset: Option<usize>,
304    length: Option<usize>,
305    index: Option<Option<u32>>,
306    jump: Option<Jump>,
307    modifier_depth: Option<usize>,
308}
309
310impl fmt::Display for SourceElementBuilder {
311    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        if self.offset.is_none()
313            && self.length.is_none()
314            && self.index.is_none()
315            && self.jump.is_none()
316            && self.modifier_depth.is_none()
317        {
318            return Ok(());
319        }
320
321        if let Some(s) = self.offset {
322            if s == 0 && self.index == Some(None) {
323                f.write_str("-1")?;
324            } else {
325                write!(f, "{s}")?;
326            }
327        }
328        if self.length.is_none()
329            && self.index.is_none()
330            && self.jump.is_none()
331            && self.modifier_depth.is_none()
332        {
333            return Ok(());
334        }
335        f.write_char(':')?;
336
337        if let Some(s) = self.length {
338            if s == 0 && self.index == Some(None) {
339                f.write_str("-1")?;
340            } else {
341                write!(f, "{s}")?;
342            }
343        }
344        if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
345            return Ok(());
346        }
347        f.write_char(':')?;
348
349        if let Some(s) = self.index {
350            let s = s.map(|s| s as i64).unwrap_or(-1);
351            write!(f, "{s}")?;
352        }
353        if self.jump.is_none() && self.modifier_depth.is_none() {
354            return Ok(());
355        }
356        f.write_char(':')?;
357
358        if let Some(s) = self.jump {
359            write!(f, "{s}")?;
360        }
361        if self.modifier_depth.is_none() {
362            return Ok(());
363        }
364        f.write_char(':')?;
365
366        if let Some(s) = self.modifier_depth {
367            if self.index == Some(None) {
368                f.write_str("-1")?;
369            } else {
370                s.fmt(f)?;
371            }
372        }
373
374        Ok(())
375    }
376}
377
378impl SourceElementBuilder {
379    fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
380        let mut element = prev.unwrap_or_default();
381        macro_rules! get_field {
382            (| $field:ident | $e:expr) => {
383                if let Some($field) = self.$field {
384                    $e;
385                }
386            };
387        }
388        get_field!(|offset| element.offset = offset.try_into()?);
389        get_field!(|length| element.length = length.try_into()?);
390        get_field!(|index| element.index = index.map(|x| x as i32).unwrap_or(-1));
391        get_field!(|jump| element.set_jump(jump));
392        // Modifier depth is optional.
393        if let Some(modifier_depth) = self.modifier_depth {
394            element.set_modifier_depth(modifier_depth)?;
395        }
396        Ok(element)
397    }
398
399    fn set_jmp(&mut self, jmp: Jump, pos: usize) -> Result<(), SyntaxError> {
400        if self.jump.is_some() {
401            return Err(SyntaxError::new(pos, "jump already set"));
402        }
403        self.jump = Some(jmp);
404        Ok(())
405    }
406
407    fn set_offset(&mut self, offset: usize, pos: usize) -> Result<(), SyntaxError> {
408        if self.offset.is_some() {
409            return Err(SyntaxError::new(pos, "offset already set"));
410        }
411        self.offset = Some(offset);
412        Ok(())
413    }
414
415    fn set_length(&mut self, length: usize, pos: usize) -> Result<(), SyntaxError> {
416        if self.length.is_some() {
417            return Err(SyntaxError::new(pos, "length already set"));
418        }
419        self.length = Some(length);
420        Ok(())
421    }
422
423    fn set_index(&mut self, index: Option<u32>, pos: usize) -> Result<(), SyntaxError> {
424        if self.index.is_some() {
425            return Err(SyntaxError::new(pos, "index already set"));
426        }
427        self.index = Some(index);
428        Ok(())
429    }
430
431    fn set_modifier(&mut self, modifier_depth: usize, pos: usize) -> Result<(), SyntaxError> {
432        if self.modifier_depth.is_some() {
433            return Err(SyntaxError::new(pos, "modifier depth already set"));
434        }
435        self.modifier_depth = Some(modifier_depth);
436        Ok(())
437    }
438}
439
440pub struct Parser<'input> {
441    lexer: Lexer<'input>,
442    last_element: Option<SourceElement>,
443    done: bool,
444    #[cfg(test)]
445    output: Option<&'input mut dyn Write>,
446}
447
448impl<'input> Parser<'input> {
449    pub fn new(input: &'input str) -> Self {
450        Self {
451            done: input.is_empty(),
452            lexer: Lexer::new(input),
453            last_element: None,
454            #[cfg(test)]
455            output: None,
456        }
457    }
458
459    fn advance(&mut self) -> Result<Option<SourceElement>, SyntaxError> {
460        // start parsing at the offset state, `s`
461        let mut state = State::Offset;
462        let mut builder = SourceElementBuilder::default();
463
464        let parse_number = |num: &str, pos: usize| {
465            let num = match num.parse::<i64>() {
466                Ok(num) => num,
467                Err(e) => return Err(SyntaxError::new(pos, e.to_string())),
468            };
469            match num {
470                ..-1 => Err(SyntaxError::new(pos, "unexpected negative number")),
471                -1 => Ok(None),
472                0.. => u32::try_from(num)
473                    .map(Some)
474                    .map_err(|_| SyntaxError::new(pos, "number too large")),
475            }
476        };
477
478        loop {
479            match self.lexer.next() {
480                Some(Ok((token, pos))) => match token {
481                    Token::Semicolon => break,
482                    Token::Number(num) => match state {
483                        State::Offset => {
484                            builder
485                                .set_offset(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
486                        }
487                        State::Length => {
488                            builder
489                                .set_length(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?;
490                        }
491                        State::Index => {
492                            builder.set_index(parse_number(num, pos)?, pos)?;
493                        }
494                        State::Modifier => builder
495                            .set_modifier(parse_number(num, pos)?.unwrap_or(0) as usize, pos)?,
496                        State::Jmp => {
497                            return Err(SyntaxError::new(pos, "expected jump, found number"));
498                        }
499                    },
500                    Token::Colon => state.advance(pos)?,
501                    Token::In => builder.set_jmp(Jump::In, pos)?,
502                    Token::Out => builder.set_jmp(Jump::Out, pos)?,
503                    Token::Regular => builder.set_jmp(Jump::Regular, pos)?,
504                },
505                Some(Err(err)) => return Err(err),
506                None => {
507                    if self.done {
508                        return Ok(None);
509                    }
510                    self.done = true;
511                    break;
512                }
513            }
514        }
515
516        #[cfg(test)]
517        if let Some(out) = self.output.as_mut() {
518            if self.last_element.is_some() {
519                out.write_char(';').unwrap();
520            }
521            write!(out, "{builder}").unwrap();
522        }
523
524        let element = builder.finish(self.last_element.take())?;
525        self.last_element = Some(element.clone());
526        Ok(Some(element))
527    }
528}
529
530impl Iterator for Parser<'_> {
531    type Item = Result<SourceElement, SyntaxError>;
532
533    fn next(&mut self) -> Option<Self::Item> {
534        self.advance().transpose()
535    }
536}
537
538/// State machine to keep track of separating `:`
539#[derive(Clone, Copy, PartialEq, Eq)]
540enum State {
541    // s
542    Offset,
543    // l
544    Length,
545    // f
546    Index,
547    // j
548    Jmp,
549    // m
550    Modifier,
551}
552
553impl State {
554    fn advance(&mut self, pos: usize) -> Result<(), SyntaxError> {
555        *self = match self {
556            Self::Offset => Self::Length,
557            Self::Length => Self::Index,
558            Self::Index => Self::Jmp,
559            Self::Jmp => Self::Modifier,
560            Self::Modifier => return Err(SyntaxError::new(pos, "unexpected colon")),
561        };
562        Ok(())
563    }
564}
565
566/// Parses a source map.
567pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
568    Parser::new(input).collect::<Result<SourceMap, SyntaxError>>().map(|mut v| {
569        v.shrink_to_fit();
570        v
571    })
572}
573
574#[cfg(test)]
575mod tests {
576    use super::*;
577
578    fn parse_test(input: &str) {
579        match parse_test_(input) {
580            Ok(_) => {}
581            Err(e) => panic!("{e}"),
582        }
583    }
584
585    fn parse_test_(input: &str) -> Result<SourceMap, SyntaxError> {
586        let mut s = String::new();
587        let mut p = Parser::new(input);
588        p.output = Some(&mut s);
589        let sm = p.collect::<Result<SourceMap, _>>()?;
590        if s != input {
591            return Err(SyntaxError::new(
592                None,
593                format!(
594                    "mismatched output:\n   actual: {s:?}\n expected: {input:?}\n       sm: {sm:#?}"
595                ),
596            ));
597        }
598        Ok(sm)
599    }
600
601    #[test]
602    fn empty() {
603        parse_test("");
604    }
605
606    #[test]
607    fn source_maps() {
608        // all source maps from the compiler output test data
609        let source_maps = include_str!("../../../../test-data/out-source-maps.txt");
610
611        for (line, s) in source_maps.lines().enumerate() {
612            let line = line + 1;
613            parse_test_(s).unwrap_or_else(|e| panic!("Failed to parse line {line}: {e}\n{s:?}"));
614        }
615    }
616
617    #[test]
618    fn cheatcodes() {
619        let s = include_str!("../../../../test-data/cheatcodes.sol-sourcemap.txt");
620        parse_test(s);
621    }
622
623    // https://github.com/foundry-rs/foundry/issues/8986
624    #[test]
625    fn univ4_deployer() {
626        let s = ":::-:0;;1888:10801:91;2615:100;;;2679:3;2615:100;;;;2700:4;2615:100;;;;-1:-1:-1;2615:100:91;;;;2546:169;;;-1:-1:-1;;2546:169:91;;;;;;;;;;;2615:100;2546:169;;;2615:100;2797:101;;;;;;;;;-1:-1:-1;;2797:101:91;;;;;;;;2546:169;2721:177;;;;;;;;;;;;;;;;;;2957:101;1888:10801;2957:101;2797;2957;;;-1:-1:-1;;2957:101:91;;;;356:29:89;2957:101:91;;;;2904:154;;;-1:-1:-1;;2904:154:91;;;;;;;;;;;;-1:-1:-1;;;;;;2904:154:91;;;;;;;;4018:32;;;;;4048:2;4018:32;;;4056:74;;;-1:-1:-1;;;;;4056:74:91;;;;;;;;1888:10801;;;;;;;;;;;;;;;;";
627        parse_test(s);
628    }
629}