Skip to main content

rusty_axml/
parser.rs

1//! Main parser routine
2//!
3//! This module contains the logic to parse the binary XML into a tree structure (`XmlElement`),
4//! representing the actual XML.
5
6use std::collections::HashMap;
7use std::rc::Rc;
8use std::cell::RefCell;
9use std::io::{
10    Error,
11    Cursor,
12    Write,
13};
14use std::fs::File;
15
16use byteorder::{
17    LittleEndian,
18    ReadBytesExt
19};
20
21use quick_xml::Writer;
22use quick_xml::events::{BytesDecl, Event};
23
24use crate::errors::AxmlError;
25use crate::{
26    ResourceMap,
27    StringPool,
28    ResTable
29};
30
31use crate::chunks::{
32    chunk_types::ChunkType,
33    chunk_header::ChunkHeader,
34    data_value_type::DataValueType,
35    res_value::ResValue,
36};
37
38/// Representation of an XML element with optional children
39#[derive(Debug)]
40pub struct XmlElement {
41    /// Type of element (e.g., `activity`, `service`)
42    element_type: String,
43    /// Attributes of the element (e.g., `exported`, `permission`)
44    attributes: HashMap<String, String>,
45    /// Vector of children of the XML element
46    children: Vec<XmlNode>
47}
48
49impl XmlElement {
50    /// Write an `XmlElement` into a writer
51    fn write_element<W: Write>(&self, writer: &mut Writer<W>) -> Result<(), Error> {
52        let mut element = writer.create_element(&self.element_type);
53
54        element = if self.attributes.is_empty() {
55            element
56        } else {
57            element.with_attributes(
58                self.attributes
59                    .iter()
60                    .map(|(k, v)| (k.as_str(), v.as_str()))
61                    .collect::<Vec<(&str, &str)>>(),
62            )
63        };
64
65        if self.children.is_empty() {
66            element.write_empty().unwrap();
67        } else {
68            element
69                .write_inner_content(|writer| -> Result<(), Error> {
70                    for child in self.children.iter() {
71                        child.as_ref().borrow().write_element(writer).unwrap();
72                    }
73
74                    Ok(())
75                })
76                .unwrap();
77        }
78
79        Ok(())
80    }
81
82    /// Get the element's type
83    pub fn element_type(&self) -> &str {
84        &self.element_type
85    }
86
87    /// Get the element's children
88    pub fn children(&self) -> &[XmlNode] {
89        &self.children
90    }
91
92    /// Get the element's attributes
93    pub fn attributes(&self) -> &HashMap<String, String> {
94        &self.attributes
95    }
96
97    /// Get the element's name off of its attributes if it exists
98    pub fn get_name(&self) -> Option<&str> {
99        if let Some(attr) = self.attributes.get("android:name") {
100            return Some(attr);
101        }
102
103        None
104    }
105
106    /// Get an attribute from an `XmlElement` if it exists
107    pub fn get_attr(&self, attr_name: &str) -> Option<&str> {
108        if let Some(attr) = self.attributes.get(attr_name) {
109            return Some(attr);
110        }
111
112        None
113    }
114}
115
116/// XML nodes
117pub type XmlNode = Rc<RefCell<XmlElement>>;
118
119/// Representation of the whole XML document
120#[derive(Debug)]
121pub struct Axml {
122    /// Root of the XML doc
123    root: XmlNode,
124}
125
126impl Axml {
127    /// Write the whole parsed XML to a file
128    pub fn write_to_file(&self, file: &mut File) -> Result<(), AxmlError> {
129        match self.to_string() {
130            Ok(str_xml) => {
131                file.write_all(str_xml.as_bytes())?;
132                Ok(())
133            },
134            Err(err) => { Err(err) }
135        }
136    }
137
138    /// Convert the whole parsed XML into a string
139    pub fn to_string(&self) -> Result<String, AxmlError> {
140        let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4);
141
142        writer
143            .write_event(Event::Decl(BytesDecl::new("1.0", Some("utf-8"), None)))?;
144
145        self.root.borrow().write_element(&mut writer)?;
146
147        let result = std::str::from_utf8(&writer.into_inner())?
148            .to_string();
149
150        Ok(result)
151    }
152
153    /// Get a reference to the root of the AXML document
154    pub fn root(&self) -> &XmlNode {
155        &self.root
156    }
157
158    /// Returns a non-consuming iterator over the AXML doc elements
159    pub fn iter(&self) -> AxmlIterator {
160        AxmlIterator {
161            stack: vec![Rc::clone(&self.root)]
162        }
163    }
164}
165
166/// Iterator over an AXML doc
167///
168/// Iterates through all of the parsed AXML doc elements using depth-first search
169pub struct AxmlIterator {
170    /// Stack of nodes for depth-first traversal
171    stack: Vec<XmlNode>,
172}
173
174impl IntoIterator for Axml {
175    type Item = XmlNode;
176    type IntoIter = AxmlIterator;
177
178    fn into_iter(self) -> Self::IntoIter {
179        AxmlIterator {
180            stack: vec![Rc::clone(&self.root)],
181        }
182    }
183}
184
185impl Iterator for AxmlIterator {
186    type Item = XmlNode;
187
188    fn next(&mut self) -> Option<Self::Item> {
189        match self.stack.pop() {
190            Some(node) => {
191                for child in &node.borrow().children {
192                    self.stack.push(Rc::clone(child));
193                }
194                Some(node)
195            },
196            None => None,
197        }
198    }
199}
200
201/// Parse the start of a namepace
202pub fn parse_start_namespace(axml_buff: &mut Cursor<Vec<u8>>,
203                             strings: &[String],
204                             namespaces: &mut HashMap::<String, String>) -> Result<(), AxmlError> {
205    // Go back 2 bytes, to account from the block type
206    let offset = axml_buff.position();
207    axml_buff.set_position(offset - 2);
208
209    // Parse chunk header
210    let _header = ChunkHeader::from_buff(axml_buff, ChunkType::ResXmlStartNamespaceType)?;
211
212    let _line_number = axml_buff.read_u32::<LittleEndian>()?;
213    let _comment = axml_buff.read_u32::<LittleEndian>()?;
214    let prefix = axml_buff.read_u32::<LittleEndian>()?;
215    let uri = axml_buff.read_u32::<LittleEndian>()?;
216
217    let prefix_str = strings.get(prefix as usize).ok_or(AxmlError::StringPoolError)?;
218    let uri_str = strings.get(uri as usize).ok_or(AxmlError::StringPoolError)?;
219    namespaces.insert(uri_str.to_string(), prefix_str.to_string());
220
221    Ok(())
222}
223
224/// Parse the end of a namepace
225pub fn parse_end_namespace(axml_buff: &mut Cursor<Vec<u8>>,
226                           _strings: &[String]) -> Result<(), AxmlError> {
227    // Go back 2 bytes, to account from the block type
228    let offset = axml_buff.position();
229    axml_buff.set_position(offset - 2);
230
231    // Parse chunk header
232    let _header = ChunkHeader::from_buff(axml_buff, ChunkType::ResXmlEndNamespaceType)?;
233
234    let _line_number = axml_buff.read_u32::<LittleEndian>()?;
235    let _comment = axml_buff.read_u32::<LittleEndian>()?;
236    let _prefix = axml_buff.read_u32::<LittleEndian>()?;
237    let _uri = axml_buff.read_u32::<LittleEndian>()?;
238
239    Ok(())
240}
241
242/// Parse the start of an element
243pub fn parse_start_element(axml_buff: &mut Cursor<Vec<u8>>,
244                           strings: &[String],
245                           namespace_prefixes: &HashMap::<String, String>) -> Result<XmlElement, AxmlError> {
246    // Go back 2 bytes, to account from the block type
247    let offset = axml_buff.position();
248    axml_buff.set_position(offset - 2);
249
250    // Parse chunk header
251    let _header = ChunkHeader::from_buff(axml_buff, ChunkType::ResXmlStartElementType)?;
252
253    let _line_number = axml_buff.read_u32::<LittleEndian>()?;
254    let _comment = axml_buff.read_u32::<LittleEndian>()?;
255    let _namespace = axml_buff.read_u32::<LittleEndian>()?;
256    let name = axml_buff.read_u32::<LittleEndian>()?;
257    let _attribute_size = axml_buff.read_u32::<LittleEndian>()?;
258    let attribute_count = axml_buff.read_u16::<LittleEndian>()?;
259    let _id_index = axml_buff.read_u16::<LittleEndian>()?;
260    let _class_index = axml_buff.read_u16::<LittleEndian>()?;
261    let _style_index = axml_buff.read_u16::<LittleEndian>()?;
262
263    let element_type = strings.get(name as usize).ok_or(AxmlError::StringPoolError)?.to_string();
264
265    let mut decoded_attrs = HashMap::<String, String>::new();
266    for _ in 0..attribute_count {
267        let attr_namespace = axml_buff.read_u32::<LittleEndian>()?;
268        let attr_name = axml_buff.read_u32::<LittleEndian>()?;
269        let attr_raw_val = axml_buff.read_u32::<LittleEndian>()?;
270        let data_value_type = ResValue::from_buff(axml_buff)?;
271
272        let mut decoded_attr_key = String::new();
273        let mut decoded_attr_val = String::new();
274
275        if attr_namespace != 0xffffffff {
276            let namespace = strings.get(attr_namespace as usize).ok_or(AxmlError::StringPoolError)?;
277            let ns_prefix = namespace_prefixes.get(namespace).ok_or(AxmlError::NamespaceError)?;
278            decoded_attr_key.push_str(ns_prefix);
279            decoded_attr_key.push(':');
280        } else {
281            // TODO
282        }
283
284        decoded_attr_key.push_str(strings.get(attr_name as usize).ok_or(AxmlError::StringPoolError)?);
285
286        if attr_raw_val != 0xffffffff {
287            decoded_attr_val.push_str(&strings.get(attr_raw_val as usize).ok_or(AxmlError::StringPoolError)?.to_string());
288        } else {
289            match data_value_type.data_type {
290                DataValueType::TypeNull => {
291                    decoded_attr_val.push_str("(null)");
292                },
293                DataValueType::TypeReference => {
294                    decoded_attr_val.push_str("0x");
295                    decoded_attr_val.push_str(&format!("{:x}", &data_value_type.data).to_string());
296                },
297                DataValueType::TypeAttribute => {
298                    decoded_attr_val.push_str("0x");
299                    decoded_attr_val.push_str(&format!("{:x}", &data_value_type.data).to_string());
300                },
301                DataValueType::TypeString => println!("TODO: DataValueType::TypeString"),
302                DataValueType::TypeFloat
303                    | DataValueType::TypeDimension
304                    | DataValueType::TypeFraction
305                    | DataValueType::TypeDynamicReference
306                    | DataValueType::TypeDynamicAttribute
307                    | DataValueType::TypeIntDec
308                    | DataValueType::TypeIntHex => {
309                        // TODO
310                        decoded_attr_val.push_str(&format!("(type 0x{:x}) ", data_value_type.data_type).to_string());
311                        decoded_attr_val.push_str(&format!("0x{:x}", &data_value_type.data).to_string());
312                },
313                DataValueType::TypeIntBoolean => {
314                    if data_value_type.data == 0 {
315                        decoded_attr_val.push_str("false");
316                    } else {
317                        decoded_attr_val.push_str("true");
318                    }
319                },
320                DataValueType::TypeIntColorArgb8 => println!("TODO: DataValueType::TypeIntColorArgb8"),
321                DataValueType::TypeIntColorRgb8 => println!("TODO: DataValueType::TypeIntColorRgb8"),
322                DataValueType::TypeIntColorArgb4 => println!("TODO: DataValueType::TypeIntColorArgb4"),
323                DataValueType::TypeIntColorRgb4 => println!("TODO: DataValueType::TypeIntColorRgb4"),
324            }
325        }
326        decoded_attrs.insert(
327                decoded_attr_key.to_string(),
328                decoded_attr_val.to_string()
329        );
330    }
331
332    Ok(XmlElement {
333        element_type,
334        attributes: decoded_attrs,
335        children: Vec::new()
336    })
337}
338
339/// Parse the end of an element
340pub fn parse_end_element(axml_buff: &mut Cursor<Vec<u8>>,
341                         strings: &[String]) -> Result<String, AxmlError> {
342    // Go back 2 bytes, to account from the block type
343    let offset = axml_buff.position();
344    axml_buff.set_position(offset - 2);
345
346    // Parse chunk header
347    let _header = ChunkHeader::from_buff(axml_buff, ChunkType::ResXmlEndElementType)?;
348
349    let _line_number = axml_buff.read_u32::<LittleEndian>()?;
350    let _comment = axml_buff.read_u32::<LittleEndian>()?;
351    let _namespace = axml_buff.read_u32::<LittleEndian>()?;
352    let name = axml_buff.read_u32::<LittleEndian>()?;
353
354    let name = strings.get(name as usize).ok_or(AxmlError::StringPoolError)?;
355    Ok(name.to_string())
356}
357
358/// Parse a whole XML document
359pub fn parse_xml(mut axml_cursor: Cursor<Vec<u8>>) -> Result<Axml, AxmlError> {
360    let mut global_strings = Vec::new();
361    let mut namespace_prefixes = HashMap::<String, String>::new();
362
363    // TODO this will not work for non manifest files
364    let root = Rc::new(RefCell::new(XmlElement {
365        element_type: "manifest".to_string(),
366        attributes: HashMap::new(),
367        children: Vec::new()
368    }));
369    let mut stack = vec![Rc::clone(&root)];
370
371    while let Ok(block_type) = ChunkType::parse_block_type(&mut axml_cursor) {
372        match block_type {
373            ChunkType::ResNullType => continue,
374            ChunkType::ResStringPoolType => {
375                let _ = StringPool::from_buff(&mut axml_cursor, &mut global_strings)?;
376            },
377            ChunkType::ResTableType => {
378                let _ = ResTable::parse(&mut axml_cursor)?;
379            },
380            ChunkType::ResXmlType => {
381                axml_cursor.set_position(axml_cursor.position() - 2);
382                let _ = ChunkHeader::from_buff(&mut axml_cursor, ChunkType::ResXmlType)?;
383            },
384            ChunkType::ResXmlStartNamespaceType => {
385                parse_start_namespace(&mut axml_cursor, &global_strings, &mut namespace_prefixes)?;
386            },
387            ChunkType::ResXmlEndNamespaceType => {
388                parse_end_namespace(&mut axml_cursor, &global_strings)?;
389            },
390            ChunkType::ResXmlStartElementType => {
391                let element = parse_start_element(&mut axml_cursor, &global_strings, &namespace_prefixes)?;
392
393                if element.element_type == "manifest" {
394                    stack.last().unwrap().borrow_mut().attributes = element.attributes.clone();
395                } else {
396                    let new_element = Rc::new(RefCell::new(element));
397                    stack.last().unwrap().borrow_mut().children.push(Rc::clone(&new_element));
398                    stack.push(new_element);
399                }
400
401            },
402            ChunkType::ResXmlEndElementType => {
403                parse_end_element(&mut axml_cursor, &global_strings)?;
404                stack.pop();
405            },
406
407            ChunkType::ResXmlResourceMapType => {
408                let _ = ResourceMap::from_buff(&mut axml_cursor)?;
409            },
410
411            _ => { },
412        }
413    }
414
415    Ok(Axml { root })
416}