Documentation
//! Parse vbaProject.bin file
//!
//! Retranscription from: 
//! https://github.com/unixfreak0037/officeparser/blob/master/officeparser.py

use std::io::Read;
use std::path::PathBuf;
use std::collections::HashMap;

use byteorder::{LittleEndian, ReadBytesExt};
use log::LogLevel;

use errors::*;
use cfb::Cfb;

/// A struct for managing VBA reading
#[allow(dead_code)]
#[derive(Clone)]
pub struct VbaProject {
    cfb: Cfb,
    references: Vec<Reference>,
    modules: HashMap<String, Vec<u8>>,
}

impl VbaProject {

    /// Create a new `VbaProject` out of the vbaProject.bin `ZipFile` or xls file
    ///
    /// Starts reading project metadata (header, directories, sectors and minisectors).
    pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject> {
        let cfb = try!(Cfb::new(r, len));
        VbaProject::from_cfb(r, cfb)
    }

    /// Creates a new `VbaProject` out of a Compound File Binary and the corresponding reader
    pub fn from_cfb<R: Read>(r: &mut R, mut cfb: Cfb) -> Result<VbaProject> {
        let (refs, mods) = try!(read_vba(&mut cfb, r));

        // read all modules
        let modules = try!(mods.into_iter()
                           .map(|m| cfb.get_stream(&m.stream_name, r)
                                .and_then(|s| ::cfb::decompress_stream(&s[m.text_offset..])
                                          .map(move |s| (m.name, s))))
                           .collect());

        Ok(VbaProject {
            cfb: cfb,
            references: refs,
            modules: modules,
        })
    }

    /// Gets the list of `Reference`s
    pub fn get_references(&self) -> &[Reference] {
        &self.references
    }

    /// Gets the list of `Module` names
    pub fn get_module_names(&self) -> Vec<&str> {
        self.modules.keys().map(|k| &**k).collect()
    }

    /// Reads module content and tries to convert to utf8
    ///
    /// While it works most of the time, the modules are MBSC encoding and the conversion
    /// may fail. If this is the case you should revert to `read_module_raw` as there is 
    /// no built in decoding provided in this crate
    ///
    /// # Examples
    /// ```
    /// use office::Excel;
    ///
    /// # let path = format!("{}/tests/vba.xlsm", env!("CARGO_MANIFEST_DIR"));
    /// let mut xl = Excel::open(path).expect("Cannot find excel file");
    /// let mut vba = xl.vba_project().expect("Cannot find vba project");
    /// let vba = vba.to_mut();
    /// let modules = vba.get_module_names().into_iter()
    ///                  .map(|s| s.to_string()).collect::<Vec<_>>();
    /// for m in modules {
    ///     println!("Module {}:", m);
    ///     println!("{}", vba.get_module(&m)
    ///                       .expect(&format!("cannot read {:?} module", m)));
    /// }
    /// ```
    pub fn get_module(&mut self, name: &str) -> Result<String> {
        debug!("read module {}", name);
        let data = try!(self.get_module_raw(name));
        let data = try!(::std::str::from_utf8(data)).to_string();
        Ok(data)
    }

    /// Reads module content (MBSC encoded) and output it as-is (binary output)
    pub fn get_module_raw(&mut self, name: &str) -> Result<&[u8]> {
        match self.modules.get(name) {
            Some(m) => Ok(&**m),
            None => return Err(format!("Cannot find module {}", name).into()),
        }
    }

}

/// A vba reference
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Reference {
    /// name
    pub name: String,
    /// description
    pub description: String,
    /// location of the reference
    pub path: PathBuf,
}

impl Reference {
    /// Check if the reference location is accessible
    pub fn is_missing(&self) -> bool {
        !self.path.as_os_str().is_empty() && !self.path.exists()
    }
}

/// A vba module
#[derive(Debug, Clone, Default)]
struct Module {
    /// module name as it appears in vba project
    name: String,
    stream_name: String,
    text_offset: usize,
}

fn read_vba<R: Read>(cfb: &mut Cfb, r: &mut R) -> Result<(Vec<Reference>, Vec<Module>)> {
    debug!("read vba");
    
    // dir stream
    let stream = try!(cfb.get_stream("dir", r));
    let stream = try!(::cfb::decompress_stream(&*stream));
    let stream = &mut &*stream;

    // read dir information record (not used)
    try!(read_dir_information(stream));

    // array of REFERENCE records
    let references = try!(read_references(stream));

    // modules
    let modules = try!(read_modules(stream));
    Ok((references, modules))
}

fn read_dir_information(stream: &mut &[u8]) -> Result<()> {
    debug!("read dir header");

    // PROJECTSYSKIND, PROJECTLCID and PROJECTLCIDINVOKE Records
    *stream = &stream[38..];
    
    // PROJECTNAME Record
    try!(check_variable_record(0x0004, stream));

    // PROJECTDOCSTRING Record
    try!(check_variable_record(0x0005, stream));
    try!(check_variable_record(0x0040, stream)); // unicode

    // PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
    try!(check_variable_record(0x0006, stream));
    try!(check_variable_record(0x003D, stream));

    // PROJECTHELPCONTEXT PROJECTLIBFLAGS and PROJECTVERSION Records
    *stream = &stream[32..];

    // PROJECTCONSTANTS Record
    try!(check_variable_record(0x000C, stream));
    try!(check_variable_record(0x003C, stream)); // unicode

    Ok(())
}

fn read_references(stream: &mut &[u8]) -> Result<Vec<Reference>> {
    debug!("read all references metadata");

    let mut references = Vec::new();

    let mut reference = Reference { 
        name: "".to_string(), 
        description: "".to_string(), 
        path: "/".into() 
    };

    fn set_module_from_libid(reference: &mut Reference, libid: &[u8]) 
        -> Result<()> 
    {
        let libid = try!(::std::str::from_utf8(libid));
        let mut parts = libid.split('#').rev();
        parts.next().map(|p| reference.description = p.to_string());
        parts.next().map(|p| reference.path = p.into());
        Ok(())
    }

    loop {

        let check = stream.read_u16::<LittleEndian>();
        match try!(check) {
            0x000F => { // termination of references array
                if !reference.name.is_empty() { references.push(reference); }
                break;
            },

            0x0016 => { // REFERENCENAME
                if !reference.name.is_empty() { references.push(reference); }

                let name = try!(read_variable_record(stream, 1));
                let name = try!(::std::string::String::from_utf8(name.to_vec()));
                reference = Reference {
                    name: name.clone(),
                    description: name.clone(),
                    path: "/".into(),
                };

                try!(check_variable_record(0x003E, stream)); // unicode
            },

            0x0033 => { // REFERENCEORIGINAL (followed by REFERENCECONTROL)
                try!(read_variable_record(stream, 1));
            },

            0x002F => { // REFERENCECONTROL
                *stream = &stream[4..]; // len of total ref control

                let libid = try!(read_variable_record(stream, 1)); //libid twiddled
                try!(set_module_from_libid(&mut reference, libid));

                *stream = &stream[6..];

                match try!(stream.read_u16::<LittleEndian>()) {
                    0x0016 => { // optional name record extended
                        try!(read_variable_record(stream, 1)); // name extended
                        try!(check_variable_record(0x003E, stream)); // name extended unicode
                        try!(check_record(0x0030, stream));
                    },
                    0x0030 => (),
                    e => return Err(format!( "unexpected token in reference control {:x}", e).into()),
                } 
                *stream = &stream[4..];
                try!(read_variable_record(stream, 1)); // libid extended
                *stream = &stream[26..];
            },

            0x000D => { // REFERENCEREGISTERED
                *stream = &stream[4..];

                let libid = try!(read_variable_record(stream, 1)); // libid registered
                try!(set_module_from_libid(&mut reference, libid));

                *stream = &stream[6..];
            },

            0x000E => { // REFERENCEPROJECT
                *stream = &stream[4..];
                let absolute = try!(read_variable_record(stream, 1)); // project libid absolute
                {
                    let absolute = try!(::std::str::from_utf8(absolute));
                    reference.path = if absolute.starts_with("*\\C") { 
                        absolute[3..].into()
                    } else {
                        absolute.into()
                    };
                }
                try!(read_variable_record(stream, 1)); // project libid relative
                *stream = &stream[6..];
            },
            c => return Err(format!("invalid of unknown check Id {}", c).into()),
        }
    }

    Ok(references)
}

fn read_modules(stream: &mut &[u8]) -> Result<Vec<Module>> {
    debug!("read all modules metadata");
    *stream = &stream[4..];
    
    let module_len = try!(stream.read_u16::<LittleEndian>()) as usize;

    *stream = &stream[8..]; // PROJECTCOOKIE record
    let mut modules = Vec::with_capacity(module_len);

    for _ in 0..module_len {

        // name
        let name = try!(check_variable_record(0x0019, stream));
        let name = try!(::std::string::String::from_utf8(name.to_vec()));

        try!(check_variable_record(0x0047, stream));      // unicode

        let stream_name = try!(check_variable_record(0x001A, stream)); // stream name
        let stream_name = try!(::std::string::String::from_utf8(stream_name.to_vec())); 

        try!(check_variable_record(0x0032, stream));      // stream name unicode
        try!(check_variable_record(0x001C, stream));      // doc string
        try!(check_variable_record(0x0048, stream));      // doc string unicode

        // offset
        try!(check_record(0x0031, stream));
        *stream = &stream[4..];
        let offset = try!(stream.read_u32::<LittleEndian>()) as usize;

        // help context
        try!(check_record(0x001E, stream));
        *stream = &stream[8..];

        // cookie
        try!(check_record(0x002C, stream));
        *stream = &stream[6..];

        match try!(stream.read_u16::<LittleEndian>()) {
            0x0021 /* procedural module */ |
            0x0022 /* document, class or designer module */ => (),
            e => return Err(format!("unknown module type {}", e).into()),
        }

        loop {
            *stream = &stream[4..]; // reserved
            match stream.read_u16::<LittleEndian>() {
                Ok(0x0025) /* readonly */ | Ok(0x0028) /* private */ => (),
                Ok(0x002B) => break,
                Ok(e) => return Err(format!("unknown record id {}", e).into()),
                Err(e) => return Err(e.into()),
            }
        }
        *stream = &stream[4..]; // reserved

        modules.push(Module {
            name: name,
            stream_name: stream_name,
            text_offset: offset,
        });
    }

    Ok(modules)
}

/// Reads a variable length record
/// 
/// `mult` is a multiplier of the length (e.g 2 when parsing XLWideString)
fn read_variable_record<'a>(r: &mut &'a[u8], mult: usize) -> Result<&'a[u8]> {
    let len = try!(r.read_u32::<LittleEndian>()) as usize * mult;
    let (read, next) = r.split_at(len);
    *r = next;
    Ok(read)
}

/// Check that next record matches `id` and returns a variable length record
fn check_variable_record<'a>(id: u16, r: &mut &'a[u8]) -> Result<&'a[u8]> {
    try!(check_record(id, r));
    let record = try!(read_variable_record(r, 1));
    if log_enabled!(LogLevel::Warn) && record.len() > 100_000 {
        warn!("record id {} as a suspicious huge length of {} (hex: {:x})", 
              id, record.len(), record.len() as u32);
    }
    Ok(record)
}

/// Check that next record matches `id`
fn check_record(id: u16, r: &mut &[u8]) -> Result<()> {
    debug!("check record {:x}", id);
    let record_id = try!(r.read_u16::<LittleEndian>());
    if record_id != id {
        Err(format!("invalid record id, found {:x}, expecting {:x}", record_id, id).into())
    } else {
        Ok(())
    }
}