1use std::borrow::{Borrow, Cow};
2use std::collections::btree_map::Entry;
3use std::collections::{BTreeMap, BTreeSet};
4use std::fs::File;
5use std::io::{self, BufReader, Read};
6use std::path::{Path, PathBuf};
7
8use bitflags::bitflags;
9
10use fugue_ir::disassembly::{IRBuilderArena, PCodeBlock};
11use fugue_ir::Translator;
12use fugue_sleigh::{CodeBlock, IRBuilder, IRBuilderError};
13
14use serde::de::value::StringDeserializer;
15use serde::ser::SerializeSeq;
16use serde::{Deserialize, Deserializer, Serialize, Serializer};
17
18use thiserror::Error;
19
20use crate::common::{
21 AttrOptWithVal, AttrWithVal, GroupOrValue, GroupOrValueVisitor, Language, OneOrMany,
22};
23use crate::pattern::PatternsWithContext;
24
25bitflags! {
26 #[derive(
27 Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize
28 )]
29 #[repr(transparent)]
30 pub struct FunctionProperties: u8 {
31 const NON_RETURNING = 0b0000_0001;
32
33 const TAIL = 0b0000_0010;
34 const RETURN_THUNK = Self::TAIL.bits();
35 }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)]
39#[serde(rename_all = "kebab-case")]
40enum FunctionProperty {
41 NonReturning,
42 ReturnThunk,
43 Tail,
44}
45
46impl From<OneOrMany<FunctionProperty>> for FunctionProperties {
47 fn from(value: OneOrMany<FunctionProperty>) -> Self {
48 Vec::from(value).into()
49 }
50}
51
52impl From<Vec<FunctionProperty>> for FunctionProperties {
53 fn from(values: Vec<FunctionProperty>) -> Self {
54 let mut props = FunctionProperties::empty();
55 for value in values {
56 match value {
57 FunctionProperty::NonReturning => {
58 props.insert(FunctionProperties::NON_RETURNING);
59 }
60 FunctionProperty::Tail | FunctionProperty::ReturnThunk => {
61 props.insert(FunctionProperties::TAIL);
62 }
63 }
64 }
65 props
66 }
67}
68
69impl From<FunctionProperties> for Vec<FunctionProperty> {
70 fn from(value: FunctionProperties) -> Self {
71 let mut props = Vec::new();
72 for prop in value.iter() {
73 match prop {
74 FunctionProperties::NON_RETURNING => {
75 props.push(FunctionProperty::NonReturning);
76 }
77 FunctionProperties::TAIL => {
78 props.push(FunctionProperty::Tail);
79 }
80 _ => (),
81 }
82 }
83 props
84 }
85}
86
87impl From<FunctionProperties> for OneOrMany<FunctionProperty> {
88 fn from(value: FunctionProperties) -> Self {
89 Vec::from(value).into()
90 }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
94pub enum PlatformConstraint {
95 Arch(Language),
96 Platform(String),
97}
98
99impl<'de> Deserialize<'de> for PlatformConstraint {
100 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
101 where
102 D: Deserializer<'de>,
103 {
104 let av = AttrWithVal::<String, String>::deserialize(deserializer)?;
105 match av.attr.as_ref() {
106 "arch" => {
107 Ok(Self::Arch(Language::deserialize(StringDeserializer::new(av.val))?))
108 }
109 "platform" => {
110 Ok(Self::Platform(av.val))
111 }
112 _ => {
113 Err(<D::Error as serde::de::Error>::custom(
114 "invalid arch/platform constraint (should be of the form arch: ... or platform: ...)",
115 ))
116 }
117 }
118 }
119}
120
121impl Serialize for PlatformConstraint {
122 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
123 where
124 S: Serializer,
125 {
126 match self {
127 Self::Arch(arch) => AttrWithVal {
128 attr: "arch",
129 val: arch,
130 }
131 .serialize(serializer),
132 Self::Platform(platform) => AttrWithVal {
133 attr: "platform",
134 val: platform,
135 }
136 .serialize(serializer),
137 }
138 }
139}
140
141#[derive(Clone, Default)]
142pub struct FunctionPatterns {
143 languages: BTreeMap<Language, Vec<PatternsWithContext>>,
144 default: Vec<PatternsWithContext>,
145}
146
147impl<'de> Deserialize<'de> for FunctionPatterns {
148 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
149 where
150 D: Deserializer<'de>,
151 {
152 let d = Vec::<AttrOptWithVal<Language, PatternsWithContext>>::deserialize(deserializer)?;
153 let mut languages = BTreeMap::new();
154 let mut default = Vec::new();
155
156 for d in d.into_iter() {
157 let (k, v) = match d {
158 AttrOptWithVal::Val(v) => {
159 default.push(v);
160 continue;
161 }
162 AttrOptWithVal::AttrWithVal(av) => (av.attr, av.val),
163 };
164
165 match languages.entry(k) {
166 Entry::Vacant(entry) => {
167 entry.insert(vec![v]);
168 }
169 Entry::Occupied(mut entry) => {
170 entry.get_mut().push(v);
171 }
172 }
173 }
174
175 Ok(Self { languages, default })
176 }
177}
178
179impl Serialize for FunctionPatterns {
180 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181 where
182 S: Serializer,
183 {
184 let mut seq = serializer.serialize_seq(None)?;
185 for (k, v) in self.languages.iter() {
186 seq.serialize_element(&AttrOptWithVal::AttrWithVal(AttrWithVal {
187 attr: k,
188 val: v,
189 }))?;
190 }
191 for v in self.default.iter() {
192 seq.serialize_element(&AttrOptWithVal::<Language, _>::Val(v))?;
193 }
194 seq.end()
195 }
196}
197
198impl FunctionPatterns {
199 pub fn matches(&self, language: impl Borrow<Language>, bytes: impl AsRef<[u8]>) -> bool {
200 let bytes = bytes.as_ref();
201 self.languages
202 .get(language.borrow())
203 .into_iter()
204 .flatten()
205 .chain(self.default.iter())
206 .any(|pat| pat.matches_exact(bytes))
207 }
208
209 pub fn len(&self) -> usize {
210 self.languages.len() + !self.default.is_empty() as usize
211 }
212}
213
214pub type FunctionSpecAliases = BTreeSet<String>;
215
216#[derive(Clone)]
217pub struct FunctionSpec {
218 name: String,
219 aliases: FunctionSpecAliases,
220 constraints: Option<GroupOrValue<PlatformConstraint>>,
221 properties: FunctionProperties,
222 patterns: FunctionPatterns,
223 fixup: Option<FunctionStub>,
224}
225
226#[derive(Clone)]
227pub struct FunctionStub {
228 source: String,
229 ast: CodeBlock,
230}
231
232impl<'de> Deserialize<'de> for FunctionStub {
233 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
234 where
235 D: Deserializer<'de>,
236 {
237 let source = String::deserialize(deserializer)?;
238 let ast =
239 CodeBlock::parse(&source).map_err(|e| <D::Error as serde::de::Error>::custom(e))?;
240
241 Ok(Self { source, ast })
242 }
243}
244
245impl Serialize for FunctionStub {
246 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
247 where
248 S: Serializer,
249 {
250 self.source.serialize(serializer)
251 }
252}
253
254impl FunctionStub {
255 pub fn ast(&self) -> &CodeBlock {
256 &self.ast
257 }
258
259 pub fn source(&self) -> &str {
260 &self.source
261 }
262
263 pub fn to_pcode<'ir>(
264 &self,
265 translator: &Translator,
266 irb: &'ir IRBuilderArena,
267 ) -> Result<PCodeBlock<'ir>, IRBuilderError> {
268 let mut builder = IRBuilder::new(translator);
269 builder.translate_parsed(irb, self.ast())
270 }
271}
272
273#[derive(Deserialize, Serialize)]
274struct FunctionSpecRepr<'a> {
275 name: Cow<'a, str>,
276 #[serde(default)]
277 aliases: Cow<'a, FunctionSpecAliases>,
278 #[serde(default, rename = "where")]
279 constraints: Cow<'a, Option<GroupOrValue<PlatformConstraint>>>,
280 #[serde(default)]
281 properties: Cow<'a, OneOrMany<FunctionProperty>>,
282 #[serde(default, alias = "pattern")]
283 patterns: Cow<'a, FunctionPatterns>,
284 #[serde(default, alias = "stub")]
285 fixup: Cow<'a, Option<FunctionStub>>,
286}
287
288impl<'de> Deserialize<'de> for FunctionSpec {
289 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
290 where
291 D: Deserializer<'de>,
292 {
293 let d = FunctionSpecRepr::deserialize(deserializer)?;
294
295 Ok(Self {
296 name: d.name.into_owned(),
297 aliases: d.aliases.into_owned(),
298 constraints: d.constraints.into_owned(),
299 properties: d.properties.into_owned().into(),
300 patterns: d.patterns.into_owned(),
301 fixup: d.fixup.into_owned(),
302 })
303 }
304}
305
306impl Serialize for FunctionSpec {
307 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
308 where
309 S: Serializer,
310 {
311 let s = FunctionSpecRepr {
312 name: Cow::Borrowed(&self.name),
313 aliases: Cow::Borrowed(&self.aliases),
314 constraints: Cow::Borrowed(&self.constraints),
315 properties: Cow::Owned(self.properties.into()),
316 patterns: Cow::Borrowed(&self.patterns),
317 fixup: Cow::Borrowed(&self.fixup),
318 };
319
320 s.serialize(serializer)
321 }
322}
323
324impl FunctionSpec {
325 pub fn name(&self) -> &str {
326 &self.name
327 }
328
329 pub fn aliases(&self) -> &FunctionSpecAliases {
330 &self.aliases
331 }
332
333 pub fn properties(&self) -> FunctionProperties {
334 self.properties
335 }
336
337 pub fn patterns(&self) -> &FunctionPatterns {
338 &self.patterns
339 }
340
341 pub fn fixup(&self) -> Option<&FunctionStub> {
342 self.fixup.as_ref()
343 }
344
345 pub fn matches<V>(&self, visitor: &V) -> bool
346 where
347 V: GroupOrValueVisitor<PlatformConstraint>,
348 {
349 self.constraints
350 .as_ref()
351 .map(|c| c.matches(visitor))
352 .unwrap_or(true)
353 }
354}
355
356#[derive(Clone, Deserialize, Serialize)]
357pub struct FunctionSpecs {
358 author: Option<String>,
359 description: Option<String>,
360 constraints: Option<GroupOrValue<PlatformConstraint>>,
361 functions: Vec<FunctionSpec>,
362}
363
364#[derive(Debug, Error)]
365pub enum FunctionSpecError {
366 #[error("cannot parse function specifications: {0}")]
367 Parse(serde_yaml::Error),
368 #[error("cannot function specifications from `{0}`: {1}")]
369 ParseFile(PathBuf, serde_yaml::Error),
370 #[error("cannot parse function specifications from `{0}`: {1}")]
371 ReadFile(PathBuf, io::Error),
372}
373
374impl FunctionSpecs {
375 pub fn from_str(input: impl AsRef<str>) -> Result<Self, FunctionSpecError> {
376 serde_yaml::from_str(input.as_ref()).map_err(FunctionSpecError::Parse)
377 }
378
379 pub fn from_reader(reader: impl Read) -> Result<Self, FunctionSpecError> {
380 serde_yaml::from_reader(reader).map_err(FunctionSpecError::Parse)
381 }
382
383 pub fn from_file(path: impl AsRef<Path>) -> Result<Self, FunctionSpecError> {
384 let path = path.as_ref();
385 let file = BufReader::new(
386 File::open(path).map_err(|e| FunctionSpecError::ReadFile(path.to_owned(), e))?,
387 );
388 serde_yaml::from_reader(file).map_err(|e| FunctionSpecError::ParseFile(path.to_owned(), e))
389 }
390
391 pub fn author(&self) -> Option<&str> {
392 self.author.as_deref()
393 }
394
395 pub fn description(&self) -> Option<&str> {
396 self.description.as_deref()
397 }
398
399 pub fn matches<V>(&self, visitor: &V) -> bool
400 where
401 V: GroupOrValueVisitor<PlatformConstraint>,
402 {
403 self.constraints
404 .as_ref()
405 .map(|c| c.matches(visitor))
406 .unwrap_or(true)
407 }
408
409 pub fn functions(&self) -> &[FunctionSpec] {
410 &self.functions
411 }
412
413 pub fn functions_matching<'a, 'v, V>(
414 &'a self,
415 visitor: &'v V,
416 ) -> impl Iterator<Item = &'a FunctionSpec> + 'v
417 where
418 'a: 'v,
419 V: GroupOrValueVisitor<PlatformConstraint>,
420 {
421 self.functions.iter().filter(|f| f.matches(visitor))
422 }
423}
424
425#[cfg(test)]
426mod test {
427 use fugue_bytes::Endian;
428
429 use crate::common::GroupOrValueVisitor;
430
431 use super::*;
432
433 #[test]
434 fn test_constraint() -> Result<(), Box<dyn std::error::Error>> {
435 let input1 = "arch: x86:LE:32";
436 let input2 = "platform: posix";
437
438 assert_eq!(
439 PlatformConstraint::Arch(Language::new_with("x86", Endian::Little, 32, None, None)),
440 serde_yaml::from_str(input1)?
441 );
442
443 assert_eq!(
444 PlatformConstraint::Platform("posix".to_owned()),
445 serde_yaml::from_str(input2)?
446 );
447
448 Ok(())
449 }
450
451 #[test]
452 fn test_function() -> Result<(), Box<dyn std::error::Error>> {
453 let input1 = r#"
454name: Perl_croak_no_mem
455properties: non-returning
456where:
457 all:
458 - any:
459 - arch: x86:LE:32
460 - arch: x86:LE:64
461 - platform: posix
462patterns:
463- x86:LE:32:
464 patterns:
465 - 10 b4
466 context:
467 - name: TMode
468 value: 1
469- patterns:
470 - 10 b4
471 context:
472 - name: TMode
473 value: 1
474"#;
475
476 let fspec = serde_yaml::from_str::<FunctionSpec>(input1)?;
477
478 assert_eq!(fspec.name, "Perl_croak_no_mem");
479 assert!(fspec.properties.contains(FunctionProperties::NON_RETURNING));
480 assert_eq!(fspec.patterns.len(), 2);
481
482 let constraints = fspec.constraints.unwrap();
483
484 struct ArchWithPlatform {
486 arch: Language,
487 platform: &'static str,
488 }
489
490 impl GroupOrValueVisitor<PlatformConstraint> for ArchWithPlatform {
491 fn matches_value(&self, value: &PlatformConstraint) -> bool {
492 match value {
493 PlatformConstraint::Platform(platform) => platform == self.platform,
494 PlatformConstraint::Arch(arch) => arch.matches(&self.arch),
495 }
496 }
497 }
498
499 assert!(constraints.matches(&ArchWithPlatform {
500 arch: Language::new("x86", Endian::Little),
501 platform: "posix",
502 }));
503
504 assert!(!constraints.matches(&ArchWithPlatform {
505 arch: Language::new("x86", Endian::Little),
506 platform: "uefi",
507 }));
508
509 Ok(())
510 }
511
512 #[test]
513 fn test_tail() -> Result<(), Box<dyn std::error::Error>> {
514 let input = r#"
515name: __x86_return_thunk
516properties: return-thunk
517where:
518 all:
519 - arch: x86:LE:64
520 - platform: posix
521patterns:
522- x86:LE:64:
523 patterns:
524 - F3 0F 1E FA C3
525"#;
526
527 let fspec = serde_yaml::from_str::<FunctionSpec>(input)?;
528
529 assert_eq!(fspec.name, "__x86_return_thunk");
530 assert!(fspec.properties.contains(FunctionProperties::TAIL));
531 assert!(fspec.properties.contains(FunctionProperties::RETURN_THUNK));
532 assert_eq!(fspec.patterns.len(), 1);
533
534 Ok(())
535 }
536
537 #[test]
538 fn test_fixup() -> Result<(), Box<dyn std::error::Error>> {
539 let input1 = r#"
540name: get_pc_thunk_bx
541where:
542 all:
543 - arch: x86:LE:32
544 - platform: posix
545patterns:
546 - 8B 1C 24 C3
547fixup: |
548 EBX = *ESP;
549 ESP = ESP + 4;
550"#;
551
552 let fspec = serde_yaml::from_str::<FunctionSpec>(input1)?;
553
554 assert_eq!(fspec.name, "get_pc_thunk_bx");
555 assert_eq!(fspec.patterns.len(), 1);
556 assert!(fspec.fixup.is_some());
557
558 Ok(())
559 }
560}