riptc 0.1.7

Rust implementation of the InertiaJS protocol compatible with `riptc` for generating strong TypeScript bindings.
//! Arguably this is one of the most important and most sensitive modules in this entire compiler.
//!
//! Here, you will find our entire serde compatibility layer code that's responsible for taking in
//! a container, field, or variant, extracting all serde attributes that are relevant to us, and then
//! applying these serde attributes over the input in a way that is _directly compatible with how serde
//! would serialize it_. Generally, this is fairly easy to do because serde derive macros and field attributes
//! are 100% deterministic, and we know which ones we can't support so we error out compilation in that case.
//!
//! Considering serde is at the end of the day just a derive macro, the single largest limitation here is that
//! anyone, at any time, can implement serialize / deserialize manually. If that happens, there's really nothing
//! that we can reasonably do about it. For this situation, we have an option in the `kind.toml` to add type overrides
//! where the user can say "i know you can't statically analyze this, but this is what this type actually is". More or
//! less this is our equivalent of unsafe. I would eventually like an option to make the configuration able to error out
//! on manual serde implementations that don't have a corresponding manual entry, but it is what it is.

// TODO(@lazkindness): add some tests here that make sure that types we are generating
// match up to what serde would serialize them as

use std::borrow::Cow;

use heck::{ToKebabCase, ToLowerCamelCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use rustc_ast::{
    token::{Delimiter, LitKind, TokenKind},
    tokenstream::{TokenStream, TokenTree},
};
use rustc_errors::DiagCtxtHandle;
use rustc_hash::FxHashSet;
use rustc_hir::{AttrArgs, AttrKind, Attribute, def_id::DefId};
use rustc_middle::ty::TyCtxt;
use rustc_span::Symbol;

// TODO(@lazkindness): detect custom serde crate and warn them of incompatibility
// TODO(@lazkindness): detect custom serde impl on serialize / deserialize and require a doc comment
// that we can detect to state what the underlying type is

/// A serde attribute that we care about for type generation.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SerdeAttr {
    /// Serde flatten attribute. This indicates that, during the conversion of
    /// a rust type to a typescript type, we should directly apply the resolved
    /// rust adt to the typescript type instead of looking it up via a typescript
    /// reference value.
    Flatten,
    /// Serde skip attribute. We just skip the adt field entirely.
    Skip,
    /// Fallback to default value, indicating that the value should be elevated to an option
    Default,
    /// Serde rename attribute. This changes the identifier we are using to cross
    /// the serde bridge.
    Rename(Symbol),
    /// Rename all, which applies to a container (either a struct container or an enum variant
    /// that contains struct fields in and of itself)
    RenameAll(Symbol),
    /// For enums, sets the key in which the variant tag is stored in the serialized data
    /// Basically, all field names regardless of if they have data in the case where this is not
    /// a unit enum will be represented in ts as a type with a type field and the context unfurled
    /// into that
    Tag(Symbol),
    /// For enums using `#[serde(tag, content)]`, sets the key that holds the payload instead of the
    /// default behavior where the payload is flattened into it
    Content(Symbol),
}

/// A collection of serde attrs to apply to a field. For simplicity, this includes all attrs inlined in the field;
/// _and_ all of the attributes on the container that would be applicable to said field. This mostly exists to provide
/// a convinience method to take in a field, transform it into whatever it needs to be, and then return the transformation.
#[derive(Debug)]
pub struct SerdeFieldAttrs(FxHashSet<SerdeAttr>);

impl SerdeFieldAttrs {
    pub fn skip(&self) -> bool {
        self.0.contains(&SerdeAttr::Skip)
    }

    pub fn default(&self) -> bool {
        self.0.contains(&SerdeAttr::Default)
    }

    pub fn flatten(&self) -> bool {
        self.0.contains(&SerdeAttr::Flatten)
    }

    pub fn rename_field<'s>(&'s self, field_name: impl Into<Cow<'s, str>>) -> Cow<'s, str> {
        // first we want to check for an inline rename because that takes presedence
        // over a container rename all
        for attr in &self.0 {
            if let SerdeAttr::Rename(sym) = attr {
                return Cow::Borrowed(sym.as_str());
            }
        }

        // then, we can check the rename all
        for attr in &self.0 {
            if let SerdeAttr::RenameAll(rule) = attr {
                return apply_rename_all(field_name.into(), rule.as_str());
            }
        }

        field_name.into()
    }

    /// Name of the tag. Lack of a tag name means the variant name itself
    /// should become the container for all of its contents.
    pub fn tag_name(&self) -> Option<&str> {
        for attr in &self.0 {
            if let SerdeAttr::Tag(sym) = attr {
                return Some(sym.as_str());
            }
        }

        None
    }

    /// Name of the content in a tagged enum. If this is none, the contents are flattened
    /// into the enum alongside the type
    pub fn content_name(&self) -> Option<&str> {
        for attr in &self.0 {
            if let SerdeAttr::Content(sym) = attr {
                return Some(sym.as_str());
            }
        }

        None
    }
}

/// Serde attrs that sit over a specific field. This does not include container attributes like
/// rename_all or, for enums, tag + content.
pub fn serde_attrs_for_field_def(tcx: TyCtxt<'_>, field_def_did: DefId) -> SerdeFieldAttrs {
    let attrs = tcx
        .get_attrs(field_def_did, Symbol::intern("serde"))
        .filter_map(|a| extract_serde_attrs(tcx.dcx(), a))
        .flatten();

    let container_attrs = tcx
        .get_attrs(tcx.parent(field_def_did), Symbol::intern("serde"))
        .filter_map(|a| extract_serde_attrs(tcx.dcx(), a))
        .flatten();

    SerdeFieldAttrs(attrs.chain(container_attrs).collect())
}

// from a regular attribute, extract all serde attributes
fn extract_serde_attrs(
    dcx: DiagCtxtHandle<'_>,
    attr: &Attribute,
) -> Option<impl Iterator<Item = SerdeAttr>> {
    let AttrKind::Normal(normal_attr) = &attr.kind else {
        return None;
    };
    if normal_attr.path.segments.len() != 1 {
        return None;
    }

    if normal_attr.path.segments[0].name != Symbol::intern("serde") {
        return None;
    }

    match &normal_attr.args {
        AttrArgs::Delimited(delimited) => {
            if !matches!(delimited.delim, Delimiter::Parenthesis) {
                return None;
            }
            Some(parse_serde_meta_list(dcx, &delimited.tokens))
        }
        _ => {
            dcx.span_err(
                attr.span,
                "serde attributes must be in the form of `#[serde(...)]`",
            );

            None
        }
    }
}

fn apply_rename_all<'s>(original: Cow<'s, str>, rename_all: &str) -> Cow<'s, str> {
    match rename_all {
        "snake_case" => Cow::Owned(original.to_snake_case()),
        "SCREAMING_SNAKE_CASE" => Cow::Owned(original.to_shouty_snake_case()),
        "kebab-case" => Cow::Owned(original.to_kebab_case()),
        "camelCase" => Cow::Owned(original.to_lower_camel_case()),
        "PascalCase" => Cow::Owned(original.to_upper_camel_case()),
        // TODO(@lazkindness): this should probably error the fuck out but i don't think
        // serde allows invalid rename rules as-is
        _ => original,
    }
}

/// Parse a single `#[serde(...)]` attributes token stream into zero or more `SerdeAttr`s
fn parse_serde_meta_list(
    dcx: DiagCtxtHandle<'_>,
    tokens: &TokenStream,
) -> impl Iterator<Item = SerdeAttr> {
    let mut result = Vec::new();

    // ollect mini-chunks of tokens separated by commas containing each key value pair
    let chunks = split_by_comma(tokens);

    for chunk in chunks {
        // and then get one meta item from each chunk, as a single rust attr can have many serde attrs
        if let Some(attr) = parse_meta_item(dcx, &chunk) {
            result.push(attr);
        }
    }

    result.into_iter()
}

// TODO(@lazkindness): figure out a better way to do this that doesn't clone tokens.
// also, i noticed something called `AttrTokenTree` / `AttrTokenStream` that may be useful,
// but theres more docs on these so i'll stick with what i can find :shrug:
/// split the top-level tokens by commas. this is a naive approach that
/// doesn't handle nested parentheses/brackets but good enough for 99%
/// of typical serde usage. please file an issue if you encounter this being
/// an issue
fn split_by_comma(stream: &TokenStream) -> Vec<TokenStream> {
    let mut result = Vec::new();
    let mut current = Vec::new();

    for tt in stream.iter() {
        match tt {
            TokenTree::Token(token, spacing) => {
                if let TokenKind::Comma = token.kind {
                    // found a comma so the current chunk is finished
                    if !current.is_empty() {
                        result.push(TokenStream::new(current));
                        current = Vec::new();
                    }
                } else {
                    current.push(TokenTree::Token(token.clone(), *spacing));
                }
            }
            TokenTree::Delimited(delim_span, delim_kind, inside, span_kind) => {
                // if we see something parentheses-esque inside then keep them as-is
                current.push(TokenTree::Delimited(
                    *delim_span,
                    *delim_kind,
                    *inside,
                    span_kind.clone(),
                ));
            }
        }
    }

    // then push the trailing comma
    if !current.is_empty() {
        result.push(TokenStream::new(current));
    }

    result
}

/// parse a single meta item chunk which can be anything in the serde attr enum
fn parse_meta_item(dcx: DiagCtxtHandle<'_>, chunk: &TokenStream) -> Option<SerdeAttr> {
    // we're expecting either a single ident, or a key value of ident = string_lit
    let mut iter = chunk.iter().peekable();

    // assume / require we start with an identifier
    let (ident_sym, ident_span) = match iter.next() {
        Some(TokenTree::Token(token, _)) => match token.ident() {
            Some((ident, _)) => (ident.name, ident.span),
            None => return None, // not an ident
        },
        _ => return None, // empty chunk or not a token
    };

    // then check for `=`. if there's no equals its a bare attr like skip or flatten
    if let Some(TokenTree::Token(eq_token, _)) = iter.peek() {
        if let TokenKind::Eq = eq_token.kind {
            // consume the eq
            iter.next();

            // next _must_ be a string literal. we can error here if we want, but this will
            // likely be caught by serde's macro itself before us since the expansion will have happened
            // before this point in the compilation so we don't need to error out here
            if let Some(TokenTree::Token(tok, _)) = iter.next() {
                match &tok.kind {
                    TokenKind::Literal(rustc_ast::token::Lit {
                        kind: LitKind::Str,
                        symbol,
                        ..
                    }) => {
                        let str_val = *symbol;
                        return match ident_sym.as_str() {
                            "rename" => Some(SerdeAttr::Rename(str_val)),
                            "rename_all" => Some(SerdeAttr::RenameAll(str_val)),
                            "tag" => Some(SerdeAttr::Tag(str_val)),
                            "content" => Some(SerdeAttr::Content(str_val)),
                            "default" => Some(SerdeAttr::Default),
                            // TODO(@lazkindness): require an override here
                            "deserialize_with" => None,
                            other => {
                                dcx.span_err(
                                    tok.span,
                                    format!("serde attribute `{other}` unrecognized by `riptc`, so we cannot confidently determine final type"),
                                );
                                return None;
                            }
                        };
                    }
                    _ => {
                        dcx.span_err(
                            tok.span,
                            "invalid serde attribute, not a string literal".to_string(),
                        );
                        return None;
                    }
                }
            } else {
                return None;
            }
        }
    }

    // if we get here there's no `=` after the identifier so its a bare attr
    match ident_sym.as_str() {
        "flatten" => Some(SerdeAttr::Flatten),
        "skip" => Some(SerdeAttr::Skip),
        "default" => Some(SerdeAttr::Default),
        // TODO(@lazkindness): require an override here
        "deserialize_with" => None,
        other => {
            dcx.span_err(
                ident_span,
            format!("serde attribute `{other}` unrecognized by `riptc`, so we cannot confidently determine final type"),
        );

            None
        }
    }
}