fugue_ir/
language.rs

1use crate::deserialise::parse::XmlExt;
2use crate::deserialise::Error as DeserialiseError;
3
4use crate::endian::Endian;
5use crate::error::Error;
6
7use crate::compiler::Specification as CSpec;
8use crate::processor::Specification as PSpec;
9use crate::Translator;
10
11use ahash::AHashMap as Map;
12use fugue_arch::{ArchitectureDef, ArchDefParseError};
13use itertools::Itertools;
14use walkdir::WalkDir;
15
16use std::fs::File;
17use std::io::Read;
18use std::path::{Path, PathBuf};
19use log;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Language {
23    id: String,
24    architecture: ArchitectureDef,
25    version: String,
26    sla_file: PathBuf,
27    processor_spec: PSpec,
28    compiler_specs: Map<String, CSpec>,
29}
30
31impl Language {
32    pub fn id(&self) -> &str {
33        &self.id
34    }
35
36    pub fn architecture(&self) -> &ArchitectureDef {
37        &self.architecture
38    }
39
40    pub fn version(&self) -> &str {
41        &self.version
42    }
43
44    pub fn sla_file(&self) -> &Path {
45        &self.sla_file
46    }
47
48    pub fn processor_spec(&self) -> &PSpec {
49        &self.processor_spec
50    }
51
52    pub fn compiler_specs(&self) -> &Map<String, CSpec> {
53        &self.compiler_specs
54    }
55
56    pub fn from_xml<P: AsRef<Path>>(root: P, input: xml::Node) -> Result<Self, DeserialiseError> {
57        Self::from_xml_with(root, input, false)
58    }
59
60    /// Build Language object from each <language> tage specified in .ldef file
61    /// # Parameters
62    /// root: the search directory for finding related files specified in the .ldef file
63    /// input: the xml::Node object which have name language for language definiation
64    /// ignore_errors: if ignore parsing errors
65    pub fn from_xml_with<P: AsRef<Path>>(
66        root: P,
67        input: xml::Node,
68        ignore_errors: bool,
69    ) -> Result<Self, DeserialiseError> {
70        // Check the correctness of the tag name
71        if input.tag_name().name() != "language" {
72            return Err(DeserialiseError::TagUnexpected(
73                input.tag_name().name().to_owned(),
74            ));
75        }
76
77        // Read path to the processor spec (.pspec) file
78        let mut path = root.as_ref().to_path_buf();
79        let pspec_path = input.attribute_string("processorspec")?;
80        path.push(pspec_path);
81
82        // Build processor spec from .pspec file
83        let processor_spec =
84            PSpec::from_file(&path).map_err(|e| DeserialiseError::DeserialiseDepends {
85                path,
86                error: Box::new(e),
87            })?;
88
89        // Read path to the compiler spec (.cspec) file
90        // Each language can have several .cspec file
91        let compiler_specs_it = input
92            .children()
93            .filter(|e| e.is_element() && e.tag_name().name() == "compiler")
94            .map(|compiler| {
95                let id = compiler.attribute_string("id")?;
96                let name = compiler.attribute_string("name")?;
97
98                let mut path = root.as_ref().to_path_buf();
99                let cspec_path = compiler.attribute_string("spec")?;
100
101                log::debug!("loading compiler specification `{}`", cspec_path);
102
103                path.push(cspec_path);
104
105                Ok((id, name, path))
106            });
107
108        // Build compiler specs from .cspec file
109        let compiler_specs = if ignore_errors {
110            compiler_specs_it
111                .filter_map_ok(|(id, name, path)| {
112                    log::debug!("id: {}, name: {}, path: {:?}", id, name, path);
113                    CSpec::named_from_file(name, &path)
114                        .ok()
115                        .map(|cspec| (id, cspec))
116                })
117                .collect::<Result<Map<_, _>, DeserialiseError>>()
118        } else {
119            compiler_specs_it
120                .map(|res| {
121                    res.and_then(|(id, name, path)| {
122                        CSpec::named_from_file(name, &path)
123                            .map(|cspec| (id, cspec))
124                            .map_err(|e| DeserialiseError::DeserialiseDepends {
125                                path,
126                                error: Box::new(e),
127                            })
128                    })
129                })
130                .collect::<Result<Map<_, _>, DeserialiseError>>()
131        }?;
132
133        // Obtain architecture information, enaian, word size, variant etc
134        let architecture = ArchitectureDef::new(
135            input.attribute_processor("processor")?,
136            input.attribute_endian("endian")?,
137            input.attribute_int("size")?,
138            input.attribute_variant("variant")?,
139        );
140
141        log::debug!(
142            "loaded {} compiler conventions for {}",
143            compiler_specs.len(),
144            architecture
145        );
146
147        // Read path to the .sla file
148        let mut path = root.as_ref().to_path_buf();
149        let slafile_path = input.attribute_string("slafile")?;
150        path.push(slafile_path);
151
152        Ok(Self {
153            id: input.attribute_string("id")?,
154            architecture,
155            version: input.attribute_string("version")?,
156            sla_file: path,
157            processor_spec,
158            compiler_specs,
159        })
160    }
161}
162
163#[derive(Debug, Clone)]
164#[repr(transparent)]
165pub struct LanguageBuilder<'a> {
166    language: &'a Language,
167}
168
169impl<'a> LanguageBuilder<'a> {
170    pub fn language(&self) -> &'a Language {
171        self.language
172    }
173
174    #[inline(always)]
175    pub fn apply_context(&self, translator: &mut Translator) {
176        for (name, val) in self.language.processor_spec.context_set() {
177            translator.set_variable_default(name.as_ref(), val);
178        }
179    }
180
181    #[inline(always)]
182    pub fn build_with(&self, apply_context: bool) -> Result<Translator, Error> {
183        let mut translator = Translator::from_file(
184            self.language.processor_spec.program_counter(),
185            &self.language.architecture,
186            &self.language.compiler_specs,
187            &self.language.sla_file,
188        )?;
189
190        if apply_context {
191            self.apply_context(&mut translator)
192        }
193
194        Ok(translator)
195    }
196
197    pub fn build(&self) -> Result<Translator, Error> {
198        self.build_with(true)
199    }
200}
201
202#[derive(Debug, Default, Clone)]
203#[repr(transparent)]
204pub struct LanguageDB {
205    db: Map<ArchitectureDef, Language>,
206}
207
208impl LanguageDB {
209    pub fn lookup_default<'a, P: Into<String>>(
210        &'a self,
211        processor: P,
212        endian: Endian,
213        bits: usize,
214    ) -> Option<LanguageBuilder<'a>> {
215        self.db
216            .get(&ArchitectureDef::new(processor, endian, bits, "default"))
217            .map(|language| LanguageBuilder { language })
218    }
219
220    pub fn lookup_str<'a, S: AsRef<str>>(
221        &'a self,
222        definition: S,
223    ) -> Result<Option<LanguageBuilder<'a>>, ArchDefParseError> {
224        let def = definition.as_ref().parse::<ArchitectureDef>()?;
225        Ok(self.db
226            .get(&def)
227            .map(|language| LanguageBuilder { language }))
228    }
229
230    pub fn lookup<'a, P: Into<String>, V: Into<String>>(
231        &'a self,
232        processor: P,
233        endian: Endian,
234        bits: usize,
235        variant: V,
236    ) -> Option<LanguageBuilder<'a>> {
237        self.db
238            .get(&ArchitectureDef::new(processor, endian, bits, variant))
239            .map(|language| LanguageBuilder { language })
240    }
241
242    pub fn definitions<'a>(&'a self) -> impl Iterator<Item = &'a ArchitectureDef> {
243        self.db.keys()
244    }
245
246    pub fn iter<'a>(&'a self) -> impl Iterator<Item = LanguageBuilder<'a>> {
247        self.db
248            .iter()
249            .map(move |(_, language)| LanguageBuilder { language })
250    }
251
252    fn into_iter(self) -> impl Iterator<Item = (ArchitectureDef, Language)> {
253        self.db.into_iter()
254    }
255
256    pub fn len(&self) -> usize {
257        self.db.len()
258    }
259
260    pub fn from_xml<P: AsRef<Path>>(root: P, input: xml::Node) -> Result<Self, DeserialiseError> {
261        Self::from_xml_with(root, input, false)
262    }
263
264    /// Build language DB from XML document
265    /// # Parameters
266    /// root: the search directory for finding related files specified in the .ldef file
267    /// input: xml::Node object parsed from .ldef file using xml::Document::parse()
268    /// ignore_errors: If we ignore parsing error
269    pub fn from_xml_with<P: AsRef<Path>>(
270        root: P,
271        input: xml::Node,
272        ignore_errors: bool,
273    ) -> Result<Self, DeserialiseError> {
274        // Example structure of .ldef file
275        // <language_definitions>
276        //   <language processor="MIPS"
277        //   endian="big"
278        //   size="32"
279        //   variant="default"
280        //   version="1.5"
281        //   slafile="mips32be.sla"
282        //   processorspec="mips32.pspec"
283        //   manualindexfile="../manuals/mipsM16.idx"
284        //   id="MIPS:BE:32:default">
285        //  <description>MIPS32 32-bit addresses, big endian, with mips16e</description>
286        //  <compiler name="default" spec="mips32.cspec" id="default"/>
287        //  <compiler name="Visual Studio" spec="mips32.cspec" id="windows"/>
288        //  <external_name tool="gnu" name="mips:4000"/>
289        //  <external_name tool="IDA-PRO" name="mipsb"/>
290        //  <external_name tool="DWARF.register.mapping.file" name="mips.dwarf"/>
291        //  </language>
292        // </language_definitions>
293        if input.tag_name().name() != "language_definitions" {
294            return Err(DeserialiseError::TagUnexpected(
295                input.tag_name().name().to_owned(),
296            ));
297        }
298
299        let root = root.as_ref().to_path_buf();
300
301        // Go through each language tag and parse them
302        let defs = input
303            .children()
304            .filter(xml::Node::is_element)
305            .filter(|t| t.tag_name().name() == "language")
306            .map(|t| {
307                let ldef = Language::from_xml_with(&root, t, ignore_errors)?;
308                Ok((ldef.architecture.clone(), ldef))
309            });
310
311        Ok(Self {
312            db: if ignore_errors {
313                defs.filter_map(|t| t.ok()).collect()
314            } else {
315                defs.collect::<Result<_, DeserialiseError>>()?
316            },
317        })
318    }
319
320    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
321        Self::from_file_with(path, false)
322    }
323
324    /// Build fugue language DB from XML file
325    /// # Parameters
326    /// path: the path to the .ldef file
327    /// ignore_errors: Ignore parsing error
328    pub fn from_file_with<P: AsRef<Path>>(path: P, ignore_errors: bool) -> Result<Self, Error> {
329        // Open file
330        let path = path.as_ref();
331        let mut file = File::open(path).map_err(|error| Error::ParseFile {
332            path: path.to_owned(),
333            error,
334        })?;
335
336        // Read to string
337        let mut input = String::new();
338        file.read_to_string(&mut input)
339            .map_err(|error| Error::ParseFile {
340                path: path.to_owned(),
341                error,
342            })?;
343
344        // Obtain the folder that the spec is in
345        let root = path
346            .parent()
347            .ok_or_else(|| {
348                DeserialiseError::Invariant("cannot obtain parent directory of language defintions")
349            })
350            .map_err(|error| Error::DeserialiseFile {
351                path: path.to_owned(),
352                error,
353            })?;
354
355        // Parse the string
356        Self::from_str_with(root, &input, ignore_errors).map_err(|error| Error::DeserialiseFile {
357            path: path.to_owned(),
358            error,
359        })
360    }
361
362    pub fn from_str<P: AsRef<Path>, S: AsRef<str>>(
363        root: P,
364        input: S,
365    ) -> Result<Self, DeserialiseError> {
366        Self::from_str_with(root, input, false)
367    }
368
369    /// Build the language DB from the XML string
370    /// # Parameters
371    /// root: the search directory for finding related files specified in the .ldef file
372    /// input: .ldef file read as string
373    /// ignore_errors: If we ignore parsing errors
374    pub fn from_str_with<P: AsRef<Path>, S: AsRef<str>>(
375        root: P,
376        input: S,
377        ignore_errors: bool,
378    ) -> Result<Self, DeserialiseError> {
379        let document = xml::Document::parse(input.as_ref()).map_err(DeserialiseError::Xml)?;
380
381        Self::from_xml_with(root, document.root_element(), ignore_errors)
382    }
383
384    pub fn from_directory<P: AsRef<Path>>(directory: P) -> Result<Self, Error> {
385        Self::from_directory_with(directory, false)
386    }
387
388    pub fn from_directory_with<P: AsRef<Path>>(
389        directory: P,
390        ignore_errors: bool,
391    ) -> Result<Self, Error> {
392        WalkDir::new(directory.as_ref())
393            .into_iter()
394            .filter_map(|e| e.ok())
395            .filter(|e| {
396                e.file_type().is_file()
397                    && e.path().extension().map(|e| e == "ldefs").unwrap_or(false)
398            })
399            .try_fold(Self::default(), |mut acc, ldef| {
400                log::debug!("loading language definition from `{:?}`", ldef);
401                match Self::from_file_with(ldef.path(), ignore_errors) {
402                    Ok(db) => {
403                        acc.db.extend(db.into_iter());
404                        Ok(acc)
405                    }
406                    Err(_) if ignore_errors => Ok(acc),
407                    Err(e) => Err(e),
408                }
409            })
410    }
411}