fugue_fspec/
fspec.rs

1use std::borrow::{Borrow, Cow};
2use std::collections::btree_map::Entry;
3use std::collections::{BTreeMap, BTreeSet};
4use std::fs::File;
5use std::io::{self, BufReader, Read};
6use std::path::{Path, PathBuf};
7
8use bitflags::bitflags;
9
10use fugue_ir::disassembly::{IRBuilderArena, PCodeBlock};
11use fugue_ir::Translator;
12use fugue_sleigh::{CodeBlock, IRBuilder, IRBuilderError};
13
14use serde::de::value::StringDeserializer;
15use serde::ser::SerializeSeq;
16use serde::{Deserialize, Deserializer, Serialize, Serializer};
17
18use thiserror::Error;
19
20use crate::common::{
21    AttrOptWithVal, AttrWithVal, GroupOrValue, GroupOrValueVisitor, Language, OneOrMany,
22};
23use crate::pattern::PatternsWithContext;
24
25bitflags! {
26    #[derive(
27        Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize
28    )]
29    #[repr(transparent)]
30    pub struct FunctionProperties: u8 {
31        const NON_RETURNING = 0b0000_0001;
32
33        const TAIL = 0b0000_0010;
34        const RETURN_THUNK = Self::TAIL.bits();
35    }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)]
39#[serde(rename_all = "kebab-case")]
40enum FunctionProperty {
41    NonReturning,
42    ReturnThunk,
43    Tail,
44}
45
46impl From<OneOrMany<FunctionProperty>> for FunctionProperties {
47    fn from(value: OneOrMany<FunctionProperty>) -> Self {
48        Vec::from(value).into()
49    }
50}
51
52impl From<Vec<FunctionProperty>> for FunctionProperties {
53    fn from(values: Vec<FunctionProperty>) -> Self {
54        let mut props = FunctionProperties::empty();
55        for value in values {
56            match value {
57                FunctionProperty::NonReturning => {
58                    props.insert(FunctionProperties::NON_RETURNING);
59                }
60                FunctionProperty::Tail | FunctionProperty::ReturnThunk => {
61                    props.insert(FunctionProperties::TAIL);
62                }
63            }
64        }
65        props
66    }
67}
68
69impl From<FunctionProperties> for Vec<FunctionProperty> {
70    fn from(value: FunctionProperties) -> Self {
71        let mut props = Vec::new();
72        for prop in value.iter() {
73            match prop {
74                FunctionProperties::NON_RETURNING => {
75                    props.push(FunctionProperty::NonReturning);
76                }
77                FunctionProperties::TAIL => {
78                    props.push(FunctionProperty::Tail);
79                }
80                _ => (),
81            }
82        }
83        props
84    }
85}
86
87impl From<FunctionProperties> for OneOrMany<FunctionProperty> {
88    fn from(value: FunctionProperties) -> Self {
89        Vec::from(value).into()
90    }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
94pub enum PlatformConstraint {
95    Arch(Language),
96    Platform(String),
97}
98
99impl<'de> Deserialize<'de> for PlatformConstraint {
100    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
101    where
102        D: Deserializer<'de>,
103    {
104        let av = AttrWithVal::<String, String>::deserialize(deserializer)?;
105        match av.attr.as_ref() {
106            "arch" => {
107                Ok(Self::Arch(Language::deserialize(StringDeserializer::new(av.val))?))
108            }
109            "platform" => {
110                Ok(Self::Platform(av.val))
111            }
112            _ => {
113                Err(<D::Error as serde::de::Error>::custom(
114                    "invalid arch/platform constraint (should be of the form arch: ... or platform: ...)",
115                ))
116            }
117        }
118    }
119}
120
121impl Serialize for PlatformConstraint {
122    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
123    where
124        S: Serializer,
125    {
126        match self {
127            Self::Arch(arch) => AttrWithVal {
128                attr: "arch",
129                val: arch,
130            }
131            .serialize(serializer),
132            Self::Platform(platform) => AttrWithVal {
133                attr: "platform",
134                val: platform,
135            }
136            .serialize(serializer),
137        }
138    }
139}
140
141#[derive(Clone, Default)]
142pub struct FunctionPatterns {
143    languages: BTreeMap<Language, Vec<PatternsWithContext>>,
144    default: Vec<PatternsWithContext>,
145}
146
147impl<'de> Deserialize<'de> for FunctionPatterns {
148    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
149    where
150        D: Deserializer<'de>,
151    {
152        let d = Vec::<AttrOptWithVal<Language, PatternsWithContext>>::deserialize(deserializer)?;
153        let mut languages = BTreeMap::new();
154        let mut default = Vec::new();
155
156        for d in d.into_iter() {
157            let (k, v) = match d {
158                AttrOptWithVal::Val(v) => {
159                    default.push(v);
160                    continue;
161                }
162                AttrOptWithVal::AttrWithVal(av) => (av.attr, av.val),
163            };
164
165            match languages.entry(k) {
166                Entry::Vacant(entry) => {
167                    entry.insert(vec![v]);
168                }
169                Entry::Occupied(mut entry) => {
170                    entry.get_mut().push(v);
171                }
172            }
173        }
174
175        Ok(Self { languages, default })
176    }
177}
178
179impl Serialize for FunctionPatterns {
180    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181    where
182        S: Serializer,
183    {
184        let mut seq = serializer.serialize_seq(None)?;
185        for (k, v) in self.languages.iter() {
186            seq.serialize_element(&AttrOptWithVal::AttrWithVal(AttrWithVal {
187                attr: k,
188                val: v,
189            }))?;
190        }
191        for v in self.default.iter() {
192            seq.serialize_element(&AttrOptWithVal::<Language, _>::Val(v))?;
193        }
194        seq.end()
195    }
196}
197
198impl FunctionPatterns {
199    pub fn matches(&self, language: impl Borrow<Language>, bytes: impl AsRef<[u8]>) -> bool {
200        let bytes = bytes.as_ref();
201        self.languages
202            .get(language.borrow())
203            .into_iter()
204            .flatten()
205            .chain(self.default.iter())
206            .any(|pat| pat.matches_exact(bytes))
207    }
208
209    pub fn len(&self) -> usize {
210        self.languages.len() + !self.default.is_empty() as usize
211    }
212}
213
214pub type FunctionSpecAliases = BTreeSet<String>;
215
216#[derive(Clone)]
217pub struct FunctionSpec {
218    name: String,
219    aliases: FunctionSpecAliases,
220    constraints: Option<GroupOrValue<PlatformConstraint>>,
221    properties: FunctionProperties,
222    patterns: FunctionPatterns,
223    fixup: Option<FunctionStub>,
224}
225
226#[derive(Clone)]
227pub struct FunctionStub {
228    source: String,
229    ast: CodeBlock,
230}
231
232impl<'de> Deserialize<'de> for FunctionStub {
233    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
234    where
235        D: Deserializer<'de>,
236    {
237        let source = String::deserialize(deserializer)?;
238        let ast =
239            CodeBlock::parse(&source).map_err(|e| <D::Error as serde::de::Error>::custom(e))?;
240
241        Ok(Self { source, ast })
242    }
243}
244
245impl Serialize for FunctionStub {
246    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
247    where
248        S: Serializer,
249    {
250        self.source.serialize(serializer)
251    }
252}
253
254impl FunctionStub {
255    pub fn ast(&self) -> &CodeBlock {
256        &self.ast
257    }
258
259    pub fn source(&self) -> &str {
260        &self.source
261    }
262
263    pub fn to_pcode<'ir>(
264        &self,
265        translator: &Translator,
266        irb: &'ir IRBuilderArena,
267    ) -> Result<PCodeBlock<'ir>, IRBuilderError> {
268        let mut builder = IRBuilder::new(translator);
269        builder.translate_parsed(irb, self.ast())
270    }
271}
272
273#[derive(Deserialize, Serialize)]
274struct FunctionSpecRepr<'a> {
275    name: Cow<'a, str>,
276    #[serde(default)]
277    aliases: Cow<'a, FunctionSpecAliases>,
278    #[serde(default, rename = "where")]
279    constraints: Cow<'a, Option<GroupOrValue<PlatformConstraint>>>,
280    #[serde(default)]
281    properties: Cow<'a, OneOrMany<FunctionProperty>>,
282    #[serde(default, alias = "pattern")]
283    patterns: Cow<'a, FunctionPatterns>,
284    #[serde(default, alias = "stub")]
285    fixup: Cow<'a, Option<FunctionStub>>,
286}
287
288impl<'de> Deserialize<'de> for FunctionSpec {
289    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
290    where
291        D: Deserializer<'de>,
292    {
293        let d = FunctionSpecRepr::deserialize(deserializer)?;
294
295        Ok(Self {
296            name: d.name.into_owned(),
297            aliases: d.aliases.into_owned(),
298            constraints: d.constraints.into_owned(),
299            properties: d.properties.into_owned().into(),
300            patterns: d.patterns.into_owned(),
301            fixup: d.fixup.into_owned(),
302        })
303    }
304}
305
306impl Serialize for FunctionSpec {
307    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
308    where
309        S: Serializer,
310    {
311        let s = FunctionSpecRepr {
312            name: Cow::Borrowed(&self.name),
313            aliases: Cow::Borrowed(&self.aliases),
314            constraints: Cow::Borrowed(&self.constraints),
315            properties: Cow::Owned(self.properties.into()),
316            patterns: Cow::Borrowed(&self.patterns),
317            fixup: Cow::Borrowed(&self.fixup),
318        };
319
320        s.serialize(serializer)
321    }
322}
323
324impl FunctionSpec {
325    pub fn name(&self) -> &str {
326        &self.name
327    }
328
329    pub fn aliases(&self) -> &FunctionSpecAliases {
330        &self.aliases
331    }
332
333    pub fn properties(&self) -> FunctionProperties {
334        self.properties
335    }
336
337    pub fn patterns(&self) -> &FunctionPatterns {
338        &self.patterns
339    }
340
341    pub fn fixup(&self) -> Option<&FunctionStub> {
342        self.fixup.as_ref()
343    }
344
345    pub fn matches<V>(&self, visitor: &V) -> bool
346    where
347        V: GroupOrValueVisitor<PlatformConstraint>,
348    {
349        self.constraints
350            .as_ref()
351            .map(|c| c.matches(visitor))
352            .unwrap_or(true)
353    }
354}
355
356#[derive(Clone, Deserialize, Serialize)]
357pub struct FunctionSpecs {
358    author: Option<String>,
359    description: Option<String>,
360    constraints: Option<GroupOrValue<PlatformConstraint>>,
361    functions: Vec<FunctionSpec>,
362}
363
364#[derive(Debug, Error)]
365pub enum FunctionSpecError {
366    #[error("cannot parse function specifications: {0}")]
367    Parse(serde_yaml::Error),
368    #[error("cannot function specifications from `{0}`: {1}")]
369    ParseFile(PathBuf, serde_yaml::Error),
370    #[error("cannot parse function specifications from `{0}`: {1}")]
371    ReadFile(PathBuf, io::Error),
372}
373
374impl FunctionSpecs {
375    pub fn from_str(input: impl AsRef<str>) -> Result<Self, FunctionSpecError> {
376        serde_yaml::from_str(input.as_ref()).map_err(FunctionSpecError::Parse)
377    }
378
379    pub fn from_reader(reader: impl Read) -> Result<Self, FunctionSpecError> {
380        serde_yaml::from_reader(reader).map_err(FunctionSpecError::Parse)
381    }
382
383    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, FunctionSpecError> {
384        let path = path.as_ref();
385        let file = BufReader::new(
386            File::open(path).map_err(|e| FunctionSpecError::ReadFile(path.to_owned(), e))?,
387        );
388        serde_yaml::from_reader(file).map_err(|e| FunctionSpecError::ParseFile(path.to_owned(), e))
389    }
390
391    pub fn author(&self) -> Option<&str> {
392        self.author.as_deref()
393    }
394
395    pub fn description(&self) -> Option<&str> {
396        self.description.as_deref()
397    }
398
399    pub fn matches<V>(&self, visitor: &V) -> bool
400    where
401        V: GroupOrValueVisitor<PlatformConstraint>,
402    {
403        self.constraints
404            .as_ref()
405            .map(|c| c.matches(visitor))
406            .unwrap_or(true)
407    }
408
409    pub fn functions(&self) -> &[FunctionSpec] {
410        &self.functions
411    }
412
413    pub fn functions_matching<'a, 'v, V>(
414        &'a self,
415        visitor: &'v V,
416    ) -> impl Iterator<Item = &'a FunctionSpec> + 'v
417    where
418        'a: 'v,
419        V: GroupOrValueVisitor<PlatformConstraint>,
420    {
421        self.functions.iter().filter(|f| f.matches(visitor))
422    }
423}
424
425#[cfg(test)]
426mod test {
427    use fugue_bytes::Endian;
428
429    use crate::common::GroupOrValueVisitor;
430
431    use super::*;
432
433    #[test]
434    fn test_constraint() -> Result<(), Box<dyn std::error::Error>> {
435        let input1 = "arch: x86:LE:32";
436        let input2 = "platform: posix";
437
438        assert_eq!(
439            PlatformConstraint::Arch(Language::new_with("x86", Endian::Little, 32, None, None)),
440            serde_yaml::from_str(input1)?
441        );
442
443        assert_eq!(
444            PlatformConstraint::Platform("posix".to_owned()),
445            serde_yaml::from_str(input2)?
446        );
447
448        Ok(())
449    }
450
451    #[test]
452    fn test_function() -> Result<(), Box<dyn std::error::Error>> {
453        let input1 = r#"
454name: Perl_croak_no_mem
455properties: non-returning
456where:
457  all:
458  - any:
459    - arch: x86:LE:32
460    - arch: x86:LE:64
461  - platform: posix
462patterns:
463- x86:LE:32:
464    patterns:
465    - 10 b4
466    context:
467    - name: TMode
468      value: 1
469- patterns:
470  - 10 b4
471  context:
472  - name: TMode
473    value: 1
474"#;
475
476        let fspec = serde_yaml::from_str::<FunctionSpec>(input1)?;
477
478        assert_eq!(fspec.name, "Perl_croak_no_mem");
479        assert!(fspec.properties.contains(FunctionProperties::NON_RETURNING));
480        assert_eq!(fspec.patterns.len(), 2);
481
482        let constraints = fspec.constraints.unwrap();
483
484        // test group matching via where
485        struct ArchWithPlatform {
486            arch: Language,
487            platform: &'static str,
488        }
489
490        impl GroupOrValueVisitor<PlatformConstraint> for ArchWithPlatform {
491            fn matches_value(&self, value: &PlatformConstraint) -> bool {
492                match value {
493                    PlatformConstraint::Platform(platform) => platform == self.platform,
494                    PlatformConstraint::Arch(arch) => arch.matches(&self.arch),
495                }
496            }
497        }
498
499        assert!(constraints.matches(&ArchWithPlatform {
500            arch: Language::new("x86", Endian::Little),
501            platform: "posix",
502        }));
503
504        assert!(!constraints.matches(&ArchWithPlatform {
505            arch: Language::new("x86", Endian::Little),
506            platform: "uefi",
507        }));
508
509        Ok(())
510    }
511
512    #[test]
513    fn test_tail() -> Result<(), Box<dyn std::error::Error>> {
514        let input = r#"
515name: __x86_return_thunk
516properties: return-thunk
517where:
518  all:
519  - arch: x86:LE:64
520  - platform: posix
521patterns:
522- x86:LE:64:
523    patterns:
524    - F3 0F 1E FA C3
525"#;
526
527        let fspec = serde_yaml::from_str::<FunctionSpec>(input)?;
528
529        assert_eq!(fspec.name, "__x86_return_thunk");
530        assert!(fspec.properties.contains(FunctionProperties::TAIL));
531        assert!(fspec.properties.contains(FunctionProperties::RETURN_THUNK));
532        assert_eq!(fspec.patterns.len(), 1);
533
534        Ok(())
535    }
536
537    #[test]
538    fn test_fixup() -> Result<(), Box<dyn std::error::Error>> {
539        let input1 = r#"
540name: get_pc_thunk_bx
541where:
542  all:
543  - arch: x86:LE:32
544  - platform: posix
545patterns:
546  - 8B 1C 24 C3
547fixup: |
548  EBX = *ESP;
549  ESP = ESP + 4;
550"#;
551
552        let fspec = serde_yaml::from_str::<FunctionSpec>(input1)?;
553
554        assert_eq!(fspec.name, "get_pc_thunk_bx");
555        assert_eq!(fspec.patterns.len(), 1);
556        assert!(fspec.fixup.is_some());
557
558        Ok(())
559    }
560}