dbin/
parser.rs

1use crate::err;
2use crate::Context;
3use crate::Data;
4use crate::Expr;
5use crate::Scope;
6use std::fmt::Debug;
7
8#[derive(Debug, Clone)]
9pub enum ParseError {
10    Other(String),
11}
12
13pub enum Pattern {
14    Exact(Vec<u8>), // expect an exact sequence of bytes
15
16    // integral types
17    U8,
18    I8,
19    LeU16,
20    LeU32,
21    LeU64,
22    BeU16,
23    BeU32,
24    BeU64,
25    LeI16,
26    LeI32,
27    LeI64,
28    BeI16,
29    BeI32,
30    BeI64,
31
32    // float types
33    LeF32,
34    LeF64,
35    BeF32,
36    BeF64,
37
38    // null terminated string
39    CStr,
40
41    // Array, with variable length
42    Array(Box<Pattern>, Expr),
43
44    AnyOf(Vec<Pattern>),
45    AllOf(Vec<Pattern>), // results in Seq of patterns
46
47    // pseudo patterns
48    // these change the parsed results and parse state,
49    // but do not directly modify what sequence of bytes
50    // they match
51    Store(Box<Pattern>, i64), // stores the resulting Data into the current scope
52    Map(
53        Box<Pattern>,
54        Box<dyn Fn(&Scope, Data) -> Result<Data, ParseError>>,
55    ),
56}
57
58impl Pattern {
59    pub fn map<F: Fn(&Scope, Data) -> Result<Data, ParseError> + 'static>(self, f: F) -> Pattern {
60        Pattern::Map(Box::new(self), Box::new(f))
61    }
62    pub fn mapval<D: Into<Data>>(self, d: D) -> Pattern {
63        let d = d.into();
64        self.map(move |_, _| Ok(d.clone()))
65    }
66    pub fn store<K: Into<i64>>(self, key: K) -> Pattern {
67        Pattern::Store(self.into(), key.into())
68    }
69    pub fn parse(&self, bytes: &[u8]) -> Result<Data, ParseError> {
70        let mut ctx = Context::new(bytes);
71        self.parse_ctx(&mut ctx)
72    }
73    fn parse_ctx(&self, ctx: &mut Context) -> Result<Data, ParseError> {
74        match self {
75            Pattern::Exact(bytes) => {
76                let peek = ctx.peek(bytes.len())?;
77                if bytes.as_slice() == peek {
78                    Ok(ctx.read(bytes.len())?.into())
79                } else {
80                    Err(ParseError::Other(format!(
81                        "Expected {:?} but got {:?}",
82                        bytes, peek
83                    )))
84                }
85            }
86            Pattern::U8 => Ok((uint(true, ctx.read(1)?) as i64).into()),
87            Pattern::I8 => Ok((sint(true, ctx.read(1)?) as i64).into()),
88            Pattern::LeU16 => Ok((uint(true, ctx.read(2)?) as i64).into()),
89            Pattern::LeU32 => Ok((uint(true, ctx.read(4)?) as i64).into()),
90            Pattern::LeU64 => Ok((uint(true, ctx.read(8)?) as i64).into()),
91            Pattern::BeU16 => Ok((uint(false, ctx.read(2)?) as i64).into()),
92            Pattern::BeU32 => Ok((uint(false, ctx.read(4)?) as i64).into()),
93            Pattern::BeU64 => Ok((uint(false, ctx.read(8)?) as i64).into()),
94            Pattern::LeI16 => Ok((sint(true, ctx.read(2)?) as i64).into()),
95            Pattern::LeI32 => Ok((sint(true, ctx.read(4)?) as i64).into()),
96            Pattern::LeI64 => Ok((sint(true, ctx.read(8)?) as i64).into()),
97            Pattern::BeI16 => Ok((sint(false, ctx.read(2)?) as i64).into()),
98            Pattern::BeI32 => Ok((sint(false, ctx.read(4)?) as i64).into()),
99            Pattern::BeI64 => Ok((sint(false, ctx.read(8)?) as i64).into()),
100            Pattern::LeF32 => Ok((f32::from_bits(uint(true, ctx.read(4)?) as u32) as f64).into()),
101            Pattern::LeF64 => Ok((f64::from_bits(uint(true, ctx.read(8)?) as u64)).into()),
102            Pattern::BeF32 => Ok((f32::from_bits(uint(false, ctx.read(4)?) as u32) as f64).into()),
103            Pattern::BeF64 => Ok((f64::from_bits(uint(false, ctx.read(8)?) as u64)).into()),
104            Pattern::CStr => {
105                let mut bytes = Vec::new();
106                while ctx.peek(1)?[0] != 0 {
107                    bytes.push(ctx.read(1)?[0]);
108                }
109                match std::str::from_utf8(&bytes) {
110                    Ok(s) => Ok(s.into()),
111                    Err(error) => err(format!("{:?}", error)),
112                }
113            }
114            Pattern::Array(pat, expr) => {
115                let len = match expr.eval(ctx.scope())? {
116                    Data::Int(i) => i as usize,
117                    x => return err(format!("Got non-int for array len ({:?})", x)),
118                };
119                let mut ret = Vec::new();
120                for _ in 0..len {
121                    ret.push(pat.parse_ctx(ctx)?);
122                }
123                Ok(ret.into())
124            }
125            Pattern::AnyOf(pats) => {
126                let pos = ctx.save();
127                let mut last = err("Empty 'any-of'");
128                for pat in pats {
129                    last = pat.parse_ctx(ctx);
130                    if last.is_ok() {
131                        return last;
132                    } else {
133                        ctx.restore(pos);
134                    }
135                }
136                last
137            }
138            Pattern::AllOf(pats) => {
139                let mut ret = Vec::new();
140                for pat in pats {
141                    ret.push(pat.parse_ctx(ctx)?);
142                }
143                Ok(ret.into())
144            }
145            Pattern::Store(pat, key) => {
146                let val = pat.parse_ctx(ctx)?;
147                ctx.scope_mut().set(*key, val.clone());
148                Ok(val)
149            }
150            Pattern::Map(pat, f) => {
151                let val = pat.parse_ctx(ctx)?;
152                Ok(f(ctx.scope(), val)?)
153            }
154        }
155    }
156
157    /// convenience method that
158    /// returns a new Pattern mapped by adding the given value
159    /// to the resulting value
160    ///   - numeric types can be added to each other,
161    ///       with two integral types, the result is an intgral value
162    ///       otherwise, you get a Float value
163    ///   - string types can be added to each other
164    ///       to create a concatenated string
165    pub fn add<D: Into<Data>>(self, rhs: D) -> Pattern {
166        let rhs = rhs.into();
167        self.map(move |_, lhs| {
168            let rhs = rhs.clone();
169            match (lhs, rhs) {
170                (Data::Int(a), Data::Int(b)) => Ok((a + b).into()),
171                (Data::Float(a), Data::Float(b)) => Ok((a + b).into()),
172                (Data::Float(a), Data::Int(b)) => Ok((a + b as f64).into()),
173                (Data::Int(a), Data::Float(b)) => Ok((a as f64 + b).into()),
174                (Data::String(a), Data::String(b)) => Ok(format!("{}{}", a, b).into()),
175                (a, b) => err(format!("Could not add given values ({:?}, {:?})", a, b,)),
176            }
177        })
178    }
179
180    /// if self -> X, this changes this pattern so that it returns
181    /// [name, X] instead.
182    /// Primarily for debugging purposes, when you want to see
183    /// what value comes from what pattern
184    pub fn label<D: Into<Data>>(self, name: D) -> Pattern {
185        let name = name.into();
186        self.map(move |_, data| Ok(vec![name.clone(), data].into()))
187    }
188
189    /// convenience method that accepts a list of keys
190    /// and returns a Pattern that when parsed will return a
191    /// map of list of (name, value) pairs, where the names
192    /// are generated from Debug of the keys and value comes from
193    /// lookup up the Scope
194    pub fn to_map<K: Into<i64> + Debug>(self, keys: Vec<K>) -> Pattern {
195        let pairs: Vec<_> = keys
196            .into_iter()
197            .map(|k| {
198                let s: Data = format!("{:?}", k).into();
199                let k: i64 = k.into();
200                (k, s)
201            })
202            .collect();
203        self.map(move |scope, _| {
204            let mut ret = Vec::new();
205            for (key, keystr) in pairs.clone() {
206                let val: Data = scope.get_or_error(key)?.clone();
207                let key: Data = keystr.into();
208                let pair: Data = vec![key, val].into();
209                ret.push(pair);
210            }
211            Ok(ret.into())
212        })
213    }
214}
215
216fn uint(little_endian: bool, bytes: &[u8]) -> u64 {
217    let mut ret: u64 = 0;
218    if little_endian {
219        for byte in bytes.iter().rev() {
220            ret <<= 8;
221            ret += (*byte) as u64;
222        }
223    } else {
224        for byte in bytes {
225            ret <<= 8;
226            ret += (*byte) as u64;
227        }
228    }
229    ret
230}
231
232fn sint(little_endian: bool, bytes: &[u8]) -> i64 {
233    let mut bytes = bytes.to_vec();
234    let byte = if little_endian {
235        *bytes.last_mut().unwrap() as i8
236    } else {
237        bytes[0] as i8
238    };
239    let minus = if byte < 0 {
240        for byte in &mut bytes {
241            *byte = !*byte;
242        }
243        true
244    } else {
245        false
246    };
247    let ui = uint(little_endian, &bytes);
248    if minus {
249        -(ui.wrapping_add(1) as i64)
250    } else {
251        ui as i64
252    }
253}