asm-lsp 0.3.1

Assembly Language Server
Documentation
use crate::types::*;

use anyhow::anyhow;
use log::debug;
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::Reader;
use regex::Regex;
use reqwest;
use std::collections::HashMap;
use std::str;
use std::str::FromStr;

/// Parse the provided XML contents and return a vector of all the instrucitons based on that.
/// If parsing fails, the appropriate error will be returned instead.
///
/// Current function assumes that the XML file is already read and that it's been given a reference
/// to its contents (`&str`).
pub fn populate_instructions(xml_contents: &str) -> anyhow::Result<Vec<Instruction>> {
    // initialisation -----------------------------------------------------------------------------
    let mut buf = Vec::new();

    // initialise the instruction set
    let mut instructions_map = HashMap::<String, Instruction>::new();

    // iterate through the XML --------------------------------------------------------------------
    let mut reader = Reader::from_str(xml_contents);
    reader.trim_text(true);

    // ref to the instruction that's currently under construction
    let mut curr_instruction = Instruction::default();
    let mut curr_instruction_form = InstructionForm::default();

    debug!("Parsing XML contents...");
    loop {
        match reader.read_event(&mut buf) {
            // start event ------------------------------------------------------------------------
            Ok(Event::Start(ref e)) => {
                match e.name() {
                    b"Instruction" => {
                        // start of a new instruction
                        curr_instruction = Instruction::default();

                        // iterate over the attributes
                        for attr in e.attributes() {
                            let Attribute { key, value } = attr.unwrap();
                            match str::from_utf8(key).unwrap() {
                                "name" => unsafe {
                                    curr_instruction.name =
                                        String::from(str::from_utf8_unchecked(&value));
                                },
                                "summary" => unsafe {
                                    curr_instruction.summary =
                                        String::from(str::from_utf8_unchecked(&value));
                                },
                                _ => {}
                            }
                        }
                    }
                    b"InstructionForm" => {
                        // Read the attributes
                        //
                        // <xs:attribute name="gas-name" type="xs:string" use="required" />
                        // <xs:attribute name="go-name" type="xs:string" />
                        // <xs:attribute name="mmx-mode" type="MMXMode" />
                        // <xs:attribute name="xmm-mode" type="XMMMode" />
                        // <xs:attribute name="cancelling-inputs" type="xs:boolean" />
                        // <xs:attribute name="nacl-version" type="NaClVersion" />
                        // <xs:attribute name="nacl-zero-extends-outputs" type="xs:boolean" />

                        // new instruction
                        curr_instruction_form = InstructionForm::default();

                        // iterate over the attributes
                        for attr in e.attributes() {
                            let Attribute { key, value } = attr.unwrap();
                            match str::from_utf8(key).unwrap() {
                                "gas-name" => unsafe {
                                    curr_instruction_form.gas_name =
                                        Some(String::from(str::from_utf8_unchecked(&value)));
                                },
                                "go-name" => unsafe {
                                    curr_instruction_form.go_name =
                                        Some(String::from(str::from_utf8_unchecked(&value)));
                                },
                                "mmx-mode" => unsafe {
                                    let value_ = value.as_ref();
                                    curr_instruction_form.mmx_mode =
                                        Some(MMXMode::from_str(str::from_utf8_unchecked(value_))?);
                                },
                                "xmm-mode" => unsafe {
                                    let value_ = value.as_ref();
                                    curr_instruction_form.xmm_mode =
                                        Some(XMMMode::from_str(str::from_utf8_unchecked(value_))?);
                                },
                                "cancelling-inputs" => match str::from_utf8(&value).unwrap() {
                                    "true" => curr_instruction_form.cancelling_inputs = Some(true),
                                    "false" => {
                                        curr_instruction_form.cancelling_inputs = Some(false)
                                    }
                                    _ => {
                                        return Err(anyhow!(
                                            "Unknown value for XML attribute {}",
                                            "nacl-zero-extends-outputs"
                                        ))
                                    }
                                },
                                "nacl-version" => {
                                    curr_instruction_form.nacl_version =
                                        value.as_ref().first().cloned();
                                }
                                "nacl-zero-extends-outputs" => {
                                    match str::from_utf8(&value).unwrap() {
                                        "true" => {
                                            curr_instruction_form.nacl_zero_extends_outputs =
                                                Some(true)
                                        }
                                        "false" => {
                                            curr_instruction_form.nacl_zero_extends_outputs =
                                                Some(false)
                                        }
                                        _ => {
                                            return Err(anyhow!(
                                                "Unknown value for XML attribute {}",
                                                "nacl-zero-extends-outputs"
                                            ))
                                        }
                                    }
                                }
                                _ => {}
                            }
                        }
                    }
                    b"Encoding" => {} // TODO
                    _ => (),          // unknown event
                }
            }
            Ok(Event::Empty(ref e)) => {
                match e.name() {
                    b"ISA" => {
                        for attr in e.attributes() {
                            let Attribute { key, value } = attr.unwrap();
                            if str::from_utf8(key).unwrap() == "id" {
                                unsafe {
                                    curr_instruction_form.isa = Some(
                                        ISA::from_str(str::from_utf8_unchecked(value.as_ref()))
                                            .unwrap_or_else(|_| {
                                                panic!(
                                                    "Unexpected ISA variant - {}",
                                                    str::from_utf8_unchecked(&value)
                                                )
                                            }),
                                    )
                                }
                            }
                        }
                    }
                    b"Operand" => {
                        let mut type_ = OperandType::k; // dummy initialisation
                        let mut extended_size = None;
                        let mut input = None;
                        let mut output = None;

                        for attr in e.attributes() {
                            let Attribute { key, value } = attr.unwrap();
                            match str::from_utf8(key).unwrap() {
                                "type" => {
                                    type_ = OperandType::from_str(str::from_utf8(&value)?)?;
                                }
                                "input" => match str::from_utf8(&value).unwrap() {
                                    "true" => input = Some(true),
                                    "false" => input = Some(false),
                                    _ => return Err(anyhow!("Unknown value for operand type")),
                                },
                                "output" => match str::from_utf8(&value).unwrap() {
                                    "true" => output = Some(true),
                                    "false" => output = Some(false),
                                    _ => return Err(anyhow!("Unknown value for operand type")),
                                },
                                "extended-size" => {
                                    extended_size = Some(
                                        str::from_utf8(value.as_ref()).unwrap().parse::<usize>()?,
                                    );
                                }
                                _ => (), // unknown event
                            }
                        }

                        curr_instruction_form.operands.push(Operand {
                            type_,
                            extended_size,
                            input,
                            output,
                        })
                    }
                    _ => (), // unknown event
                }
            }
            // end event --------------------------------------------------------------------------
            Ok(Event::End(ref e)) => {
                match e.name() {
                    b"Instruction" => {
                        // finish instruction
                        instructions_map
                            .insert(curr_instruction.name.clone(), curr_instruction.clone());
                    }
                    b"InstructionForm" => {
                        curr_instruction.push_form(curr_instruction_form.clone());
                    }
                    _ => (), // unknown event
                }
            }
            Ok(Event::Eof) => break,
            Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
            _ => (), // rest of events that we don't consider
        }
    }

    // provide a URL example page -----------------------------------------------------------------
    // parse this x86 page, grab the contents of the table + the URLs they are referring to
    let x86_online_docs = String::from("https://www.felixcloutier.com/x86/");
    debug!(
        "Fetching further documentation from the web -> {}...",
        x86_online_docs
    );
    let body = reqwest::blocking::get(&x86_online_docs)?.text()?;

    // skip first line
    let body_it = body.split("<td>").skip(1).step_by(2);

    // Regex to match:
    // <a href="./VSCATTERPF1DPS:VSCATTERPF1QPS:VSCATTERPF1DPD:VSCATTERPF1QPD.html">VSCATTERPF1QPS</a></td>
    //
    // let re = Regex::new(r"<a href=\"./(.*)">(.*)</a></td>")?;
    let re = Regex::new(r#"<a href="\./(.*?\.html)">(.*?)</a>.*</td>"#)?;
    for line in body_it {
        // take it step by step.. match a small portion of the line first...
        let caps = re.captures(line).unwrap();
        let url_suffix = caps.get(1).map_or("", |m| m.as_str());
        let instruction_name = caps.get(2).map_or("", |m| m.as_str());

        // add URL to the corresponding instruction
        match instructions_map.get_mut(instruction_name) {
            None => (), // key not found
            Some(instruction) => instruction.url = Some(x86_online_docs.clone() + url_suffix),
        }
    }

    Ok(instructions_map.into_values().collect())
}

pub fn populate_name_to_instruction_map<'instruction>(
    instructions: &'instruction Vec<Instruction>,
    names_to_instructions: &mut NameToInstructionMap<'instruction>,
) {
    for instruction in instructions {
        for name in &instruction.get_associated_names() {
            names_to_instructions.insert(name, instruction);
        }
    }
}