deserialize_xml_derive 0.2.1

Provides the procedural macro that powers the crate `deserialize_xml`
Documentation
//! This crate contains the procedural macro powering `#[derive(DeserializeXml)]`; see the
//! `deserialize_xml` crate for documentation.
use proc_macro::TokenStream;
use proc_macro2::TokenStream as TokenStream2;
use quote::{format_ident, quote};
use syn::DataStruct;

// TODO: find better name for attribute...
#[proc_macro_derive(DeserializeXml, attributes(deserialize_xml))]
pub fn deserialize_xml_derive(input: TokenStream) -> TokenStream {
    let ast = syn::parse(input).unwrap();

    impl_deserialize_xml_macro(&ast)
}

// Previous implementation also returned some enum variants to be added to the parser; keeping this
// struct around in case generate_parser should once again return extra info
struct ParserCode {
    match_body: Vec<TokenStream2>,
}

// Represents the various attributes this macro supports
enum Attr {
    Tag(String), // #[deserialize_xml(tag = "channel")] -> start parsing this upon encountering a <channel> element
}

enum FieldType<'a> {
    Basic,
    Vec(&'a proc_macro2::Ident),
    Option(&'a proc_macro2::Ident),
}

/// Given a type T, identify whether T is a Vec, Option, or neither (in our parlance, a "Basic"
/// type). This information determines what the field assignment statement should be for this
/// member (e.g., if this is a Vec<T>, we'll need to push the result, not just set it with the `=`
/// operator).
/// NOTE: this is *extremely* rudimentary. It only operates at the _textual_ level, so if, e.g.,
/// you "spell" Vec<T> differently (e.g. maybe you've aliased Vec<T>, or are using
/// `std::vec::Vec`), this function will give a false negative. Obviously custom Vec-like types are
/// also unrecognized.
// TODO: try this non-textual approach from StackOverflow user "trent"?
// (https://stackoverflow.com/questions/55271857/how-can-i-get-the-t-from-an-optiont-when-using-syn#comment97335199_55271857):
// One way to achieve a similar effect by relying on the actual type system might be to connect
// Option<T> back to T by using a trait, for example: impl<T> MyTrait for Option<T> { type Assoc =
// T; }. Then your macro can expand to set_bar(&mut self, v: <Option<String> as MyTrait>::Assoc)
fn parse_field_type(ty: &syn::Type) -> FieldType {
    use FieldType::*;
    // I promise there's nothing exciting here; we're just laboriously traversing the syn
    // structs/enums.
    let path = match ty {
        syn::Type::Path(syn::TypePath { path, .. }) => path,
        _ => return Basic,
    };

    if path.segments.len() != 1 {
        return Basic;
    }

    let ps = match path.segments.first() {
        Some(ps @ syn::PathSegment { .. }) => ps,
        None => return Basic,
    };

    if ps.ident != "Vec" && ps.ident != "Option" {
        return Basic;
    }

    let generic_args = match &ps.arguments {
        syn::PathArguments::AngleBracketed(syn::AngleBracketedGenericArguments {
            args, ..
        }) => args,
        _ => return Basic,
    };

    if generic_args.len() != 1 {
        return Basic;
    }

    // Sounds vaguely Buddhist...
    let inner_path = match generic_args.first() {
        Some(syn::GenericArgument::Type(syn::Type::Path(syn::TypePath { path, .. }))) => path,
        _ => return Basic,
    };

    if inner_path.segments.len() != 1 {
        return Basic;
    }

    if let Some(seg) = inner_path.segments.first() {
        match ps.ident.to_string().as_str() {
            "Vec" => Vec(&seg.ident),
            "Option" => Option(&seg.ident),
            _ => unreachable!("Earlier check should ensure this is either 'Vec' or 'Option'"),
        }
    } else {
        Basic
    }
}

fn generate_parser(ds: &DataStruct) -> ParserCode {
    let mut match_body = vec![];

    match &ds.fields {
        syn::Fields::Named(named) => {
            for field in &named.named {
                // TODO: is unwrap() safe here? It *is* a named field...
                let field_name = field.ident.as_ref().unwrap();
                let field_type = parse_field_type(&field.ty);
                let mut tag_name = match field_type {
                    FieldType::Basic => field_name.to_string().to_ascii_lowercase(),
                    FieldType::Vec(t) => t.to_string().to_ascii_lowercase(),
                    FieldType::Option(_) => field_name.to_string().to_ascii_lowercase(),
                };

                // TODO: refactor this out (same logic used in impl_deserialize_xml_macro)
                for raw_attr in &field.attrs {
                    for parsed_attr in parse_attr(raw_attr) {
                        match parsed_attr {
                            // TODO: warn use of, or disallow "last one wins" behaviour?
                            Attr::Tag(s) => {
                                tag_name = s;
                            }
                        }
                    }
                }

                let assignment_stmt = match field_type {
                    // For "plain" fields, we just need a simple assignment
                    FieldType::Basic => quote! {
                        result.#field_name = DeserializeXml::from_reader(reader)?;
                    },
                    // If the associated struct field is a Vec, we need to push (not just assign)
                    FieldType::Vec(_) => quote! {
                        result.#field_name.push(DeserializeXml::from_reader(reader)?);
                    },
                    // Similarly, for Options, wrap inner value in Some
                    FieldType::Option(_) => quote! {
                        result.#field_name = Some(DeserializeXml::from_reader(reader)?);
                    },
                };

                let match_body_case = quote! {
                    (BuildingStruct, Ok(StartElement { name, .. })) if name.local_name == #tag_name && name.prefix.is_none() => {
                        #assignment_stmt
                    }
                };

                match_body.push(match_body_case);
            }
        }
        _ => panic!("#[derive(DeserializeXml)] is only implemented for named fields"), // TODO: use universal error message
    }

    ParserCode { match_body }
}

/// Attempts to parse an attribute as understood by this macro.
fn parse_attr(attr: &syn::Attribute) -> Vec<Attr> {
    let meta = match attr.parse_meta() {
        // TODO: if the user tries to specify a valid attribute for this macro, but makes a typo
        // causing parse_meta() to fail, that attribute will get swallowed silently--can we alert
        // somehow?
        Err(_) => return vec![],
        Ok(syn::Meta::List(meta)) => meta,
        // TODO: no panic!s before checking we're in our attribute?
        Ok(meta) => panic!(
            "#[deserialize_xml(...)] encountered unknown attribute: {:?}",
            meta
        ),
    };

    if meta.path.segments.len() != 1 || meta.path.segments[0].ident != "deserialize_xml" {
        return vec![];
    }

    let mut parsed_attrs = vec![];

    for nested in meta.nested {
        let nv = match nested {
            syn::NestedMeta::Meta(syn::Meta::NameValue(nv)) => nv,
            _ => panic!("#[deserialize_xml(...)]: unsupported attribute format"),
        };

        if nv.path.segments.len() != 1 {
            return parsed_attrs;
        }

        match nv.path.segments[0].ident.to_string().as_str() {
            "tag" => match nv.lit {
                syn::Lit::Str(s) => parsed_attrs.push(Attr::Tag(s.value())),
                x => panic!(
                    "#[deserialize_xml(...)]: unrecognized value for 'tag' attribute: {:?}",
                    x
                ),
            },
            x => panic!("#[deserialize_xml(...)]: unrecognized attribute: '{}'", x),
        }
        // TODO: is it possible to have multiple "attributes" in a #[deserialize_xml(...)] tag? If
        // so, parse them all or fail if we find more than one?
    }

    parsed_attrs
}

fn impl_deserialize_xml_macro(ast: &syn::DeriveInput) -> TokenStream {
    let name = &ast.ident;

    let mut tag_name = ast.ident.to_string().to_ascii_lowercase();

    for raw_attr in &ast.attrs {
        for parsed_attr in parse_attr(raw_attr) {
            match parsed_attr {
                // TODO: warn use of, or disallow "last one wins" behaviour?
                Attr::Tag(s) => {
                    tag_name = s;
                }
            }
        }
    }

    let parser_code = match &ast.data {
        syn::Data::Struct(ds) => generate_parser(ds),
        _ => panic!("#[derive(DeserializeXml)] is only implemented for structs"),
    };

    let match_body = parser_code.match_body;
    let parser_state_enum_ident = format_ident!("ParserState{}", name);

    let gen = quote! {
        #[allow(non_camel_case_types)]
        enum #parser_state_enum_ident {
            Start,
            BuildingStruct,
        }

        impl DeserializeXml for #name {
            fn from_reader<R: ::deserialize_xml::Read>(reader: &mut ::deserialize_xml::Peekable<::deserialize_xml::xml::reader::Events<R>>) -> ::deserialize_xml::Result<Self> {
                use ::deserialize_xml::xml::reader;
                // Save ourselves some typing when spelling out enum variants
                use ::deserialize_xml::xml::reader::XmlEvent::*;
                use #parser_state_enum_ident::*;

                let mut state = Start;
                let mut result = Self::default();

                while let Some(event) = reader.peek() {
                    match (&mut state, event) {
                        (_, Err(e)) => { reader.next().unwrap()?; },
                        (_, Ok(EndDocument)) => break,
                        (Start, Ok(StartDocument { .. })) => { reader.next(); },
                        (Start, Ok(StartElement { name, .. })) if name.local_name == #tag_name => {
                            state = BuildingStruct;
                            reader.next();
                        },
                        (_, Ok(EndElement { name, .. })) if name.local_name == #tag_name => {
                            reader.next(); // consume our closing tag
                            break;
                        },
                        #(#match_body)*
                        // We'll be as permissive as possible: if we find something we don't
                        // understand, just ignore it
                        (_, Ok(_)) => { reader.next(); },
                    }
                }

                Ok(result)
            }
        }
    };

    gen.into()
}