dtd_macro/
lib.rs

1#![feature(proc_macro_diagnostic)]
2
3use std::collections::HashMap;
4use std::iter::FromIterator;
5
6use inflector::Inflector;
7use proc_macro::TokenStream;
8use proc_macro2::TokenStream as TokenStream2;
9use quote::{format_ident, quote, quote_spanned, TokenStreamExt};
10use syn::parse::{Parse, ParseStream, Result};
11use syn::{parse_macro_input, Ident, LitStr};
12
13use dtd_parser as parser;
14
15enum DefinitionsOrPath {
16    Definetions(LitStr),
17    Path(LitStr),
18}
19
20impl Parse for DefinitionsOrPath {
21    fn parse(input: ParseStream) -> Result<Self> {
22        let maybe_path: LitStr = input.parse()?;
23        let value = maybe_path.value();
24        let trimed = value.trim();
25        if trimed.starts_with("<") && trimed.ends_with(">") {
26            Ok(DefinitionsOrPath::Definetions(maybe_path))
27        } else {
28            Ok(DefinitionsOrPath::Path(maybe_path))
29        }
30    }
31}
32
33macro_rules! safe {
34    ($ident:expr) => {
35        match $ident.to_string().as_str() {
36            "as" | "break" | "const" | "continue" | "crate" | "else" | "enum" | "extern"
37            | "false" | "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | "match" | "mod"
38            | "move" | "mut" | "pub" | "ref" | "return" | "self" | "Self" | "static" | "struct"
39            | "super" | "trait" | "true" | "type" | "unsafe" | "use" | "where" | "while"
40            | "async" | "await" | "dyn" | "abstract" | "become" | "box" | "do" | "final"
41            | "macro" | "override" | "priv" | "typeof" | "unsized" | "virtual" | "yield"
42            | "try" | "union" => format_ident!("{}_", $ident),
43            _ => $ident,
44        }
45    };
46}
47
48/// Accept dtd file path in type AsRef<Path> or Definetions startswith '<' and endswith '>'.
49///
50///     dtd!($PATH);
51///     dtd!($DEFINETIONS);
52///
53/// Example:
54///
55///     dtd!("testdata/docutils.dtd");
56///     dtd! {
57///         <!ELEMENT decoration (header?, footer?)>
58///         <!ATTLIST decoration %basic.atts;>
59///
60///         <!ELEMENT header (%body.elements;)+>
61///         <!ATTLIST header %basic.atts;>
62///
63///         <!ELEMENT footer (%body.elements;)+>
64///         <!ATTLIST footer %basic.atts;>
65///     }
66#[proc_macro]
67pub fn dtd(input: TokenStream) -> TokenStream {
68    let definitions_or_path = parse_macro_input!(input as DefinitionsOrPath);
69
70    // The warning looks like this.
71    //
72    //     warning: come on, pick a more creative name
73    //       --> src/main.rs:10:16
74    //        |
75    //     10 |     static ref FOO: String = "lazy_static".to_owned();
76    //        |                ^^^
77    /*
78    if name == "FOO" {
79        name.span()
80            .unwrap()
81            .warning("come on, pick a more creative name")
82            .emit();
83    }
84
85    // The error looks like this.
86    //
87    //     error: I can't think of a legitimate use for lazily initializing the value `()`
88    //       --> src/main.rs:10:27
89    //        |
90    //     10 |     static ref UNIT: () = ();
91    //        |                           ^^
92    if let Expr::Tuple(ref init) = init {
93        if init.elems.is_empty() {
94            init.span()
95                .unwrap()
96                .error("I can't think of a legitimate use for lazily initializing the value `()`")
97                .emit();
98            return TokenStream::new();
99        }
100    }
101    */
102    let (parsing, span) = match definitions_or_path {
103        DefinitionsOrPath::Definetions(definitions) => {
104            (parser::parse_str(&definitions.value()), definitions)
105        }
106        DefinitionsOrPath::Path(path) => {
107            let value = path.value();
108            if !value.ends_with(".dtd") {
109                path.span()
110                    .unwrap()
111                    .error(&format!(
112                        "dtd file path shold ends with `.dtd`, but got {}.",
113                        &value
114                    ))
115                    .emit();
116                return TokenStream::new();
117            }
118            let pathbuf = match std::path::PathBuf::from(value).canonicalize() {
119                Ok(absolute) => absolute,
120                Err(err) => {
121                    path.span()
122                        .unwrap()
123                        .error(&format!("invald dtd file path: `{}`.", err))
124                        .emit();
125                    return TokenStream::new();
126                }
127            };
128            if !pathbuf.exists() {
129                path.span()
130                    .unwrap()
131                    .error(&format!(
132                        "dtd file `{}` dose not exists!",
133                        pathbuf.display()
134                    ))
135                    .emit();
136                return TokenStream::new();
137            }
138            (parser::parse(pathbuf), path)
139        }
140    };
141
142    let definitions = match parsing {
143        Ok(definitions) => definitions,
144        Err(err) => {
145            span.span().unwrap().error(&err).emit();
146            return TokenStream::new();
147        }
148    };
149
150    // let mut structs = HashMap::default();
151    let mut tokens = Vec::new();
152
153    println!("{} definitions found.", definitions.len());
154
155    let mut context = Context::new();
156
157    for definition in definitions.into_iter().rev() {
158        match definition {
159            parser::ElementType::Element(element) => {
160                let struct_name = format_ident!("{}", element.name().to_pascal_case());
161                match element.category() {
162                    parser::ElementCategory::Empty => {
163                        let token = quote! {
164                            #[derive(Clone, Debug)]
165                            pub struct #struct_name;
166                        };
167                        // println!(" > EMPTY: {} {}", struct_name, token.to_string());
168                        if context.get(&struct_name).is_none() {
169                            context.insert(struct_name.clone(), token.clone());
170                            tokens.push(token);
171                        }
172                    }
173                    parser::ElementCategory::PCDATA => {
174                        // String
175                        let token = quote! {
176                            #[derive(Clone, Debug)]
177                            pub struct #struct_name(pub String);
178                        };
179                        // println!(" > PCDATA: {}", struct_name);
180                        if context.get(&struct_name).is_none() {
181                            context.insert(struct_name.clone(), token.clone());
182                            tokens.push(token);
183                        }
184                    }
185                    parser::ElementCategory::CDATA => {
186                        // String
187                        let token = quote! {
188                            #[derive(Clone, Debug)]
189                            pub struct #struct_name(pub String);
190                        };
191                        // println!(" > CDATA: {}", struct_name);
192                        if context.get(&struct_name).is_none() {
193                            context.insert(struct_name.clone(), token.clone());
194                            tokens.push(token);
195                        }
196                    }
197                    parser::ElementCategory::Any => {
198                        // Any?
199                        let token = quote! {
200                            #[derive(Clone, Debug)]
201                            pub struct #struct_name(pub Box<dyn ::std::any::Any>);
202                        };
203                        // println!(" > Any: {}", struct_name);
204                        if context.get(&struct_name).is_none() {
205                            context.insert(struct_name.clone(), token.clone());
206                            tokens.push(token);
207                        }
208                    }
209                    parser::ElementCategory::Mixed(repeatable) => {
210                        match repeatable {
211                            parser::Repeatable::Once(_) => {
212                                // No child, it is String.
213                                let token = quote! {
214                                    #[derive(Clone, Debug)]
215                                    pub struct #struct_name(pub String);
216                                };
217                                // println!(" > Mixed: {}", struct_name);
218
219                                if context.get(&struct_name).is_none() {
220                                    context.insert(struct_name.clone(), token.clone());
221                                    tokens.push(token);
222                                }
223                            }
224                            parser::Repeatable::ZeroOrManyTimes(parser::MixedPCDATA(names)) => {
225                                // Has child, Mixed with String.
226                                let fields = names.iter().map(|name| {
227                                    let typ = format_ident!(
228                                        "{}",
229                                        name.to_pascal_case(),
230                                        span = struct_name.span()
231                                    );
232                                    quote! {
233                                        #typ(::std::vec::Vec<#typ>)
234                                    }
235                                });
236                                let token = quote! {
237                                    #[derive(Clone, Debug)]
238                                    pub enum #struct_name {
239                                        PCDATA(::std::vec::Vec<String>),
240                                        #(#fields, )*
241                                    }
242                                };
243                                // println!(" > ZeroOrManyTimes: {}", struct_name);
244                                context.entry(struct_name).or_insert_with(|| {
245                                    tokens.push(token.clone());
246                                    token
247                                });
248                            }
249                            _ => unreachable!(),
250                        }
251                    }
252                    parser::ElementCategory::Children(children) => {
253                        let (ident, token_stream) =
254                            children.to_token_stream(&mut context, &struct_name);
255                        let token = quote! {
256                            pub type #struct_name = #ident;
257                        };
258                        context.entry(struct_name).or_insert_with(|| {
259                            tokens.push(token.clone());
260                            token
261                        });
262                        tokens.push(token_stream.clone());
263                    }
264                }
265            }
266            parser::ElementType::Entity(_entity) => {
267                //
268            }
269            parser::ElementType::Attlist(attlist) => {
270                let attrname = format_ident!("{}Attributes", attlist.name.to_pascal_case());
271                let mut names: Vec<Ident> = Vec::new();
272                let mut types: Vec<TokenStream2> = Vec::new();
273                attlist.attdefs.iter().for_each(|attdef| {
274                    let name = format_ident!("{}", attdef.name.to_pascal_case());
275                    let (typename, token_stream) = attdef.to_token_stream(&mut context, &name);
276                    let typename = match attdef.default_decl {
277                        parser::DefaultDecl::Implied => {
278                            quote! { ::std::option::Option<#typename> }
279                        }
280                        parser::DefaultDecl::Required => {
281                            quote! { #typename }
282                        }
283                        parser::DefaultDecl::Fixed(ref default)
284                        | parser::DefaultDecl::Default(ref default) => {
285                            let default = default.to_string();
286                            if !token_stream.is_empty() {
287                                tokens.push(quote! {
288                                    impl ::std::default::Default for #typename {
289                                        fn default() -> #typename {
290                                            use ::std::convert::TryFrom;
291                                            #typename::try_from(#default).unwrap()
292                                        }
293                                    }
294                                });
295                            }
296                            quote!( #typename )
297                        }
298                    };
299                    names.push(safe!(format_ident!("{}", name.to_string().to_snake_case())));
300                    types.push(typename);
301                    tokens.push(token_stream);
302                });
303                tokens.push(quote! {
304                    #[derive(Clone, Debug)]
305                    pub struct #attrname {
306                        #(pub #names: #types), *
307                    }
308                });
309            }
310            parser::ElementType::Comment(_) => {
311                //
312            }
313        }
314    }
315
316    /*
317    tokens
318        .iter()
319        .for_each(|token| println!("{}", token.to_string()));
320    */
321
322    TokenStream2::from_iter(tokens.into_iter()).into()
323}
324
325type Context = HashMap<Ident, TokenStream2>;
326
327trait ToTokenStream {
328    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2);
329}
330
331impl<T> ToTokenStream for parser::Repeatable<T>
332where
333    T: ToTokenStream,
334{
335    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
336        match self {
337            parser::Repeatable::Once(once) => once.to_token_stream(context, ident),
338            parser::Repeatable::AtMostOnce(opt) => {
339                // opt
340                let (ident, mut token_stream) = opt.to_token_stream(context, ident);
341                let name = format_ident!("Opt{}", ident, span = ident.span());
342
343                let defined = context.get(&name).is_some();
344                if defined {
345                    // (ident, TokenStream2::new())
346                } else {
347                    let token = quote! {
348                        pub type #name = ::std::option::Option<#ident>;
349                    };
350
351                    context.insert(name.clone(), token.clone());
352
353                    token_stream.extend(token);
354                }
355
356                (name, token_stream)
357            }
358            parser::Repeatable::AtLeastOnce(more_than_zero) => {
359                let (ident, mut token_stream) = more_than_zero.to_token_stream(context, ident);
360                let name = format_ident!("NonEmpty{}", ident, span = ident.span());
361
362                let defined = context.get(&name).is_some();
363
364                if !defined {
365                    let token = quote! {
366                        pub type #name = ::std::vec::Vec<#ident>;
367                    };
368                    context.insert(name.clone(), token.clone());
369
370                    token_stream.extend(token);
371                } else {
372                    //
373                }
374                (name, token_stream)
375            }
376            parser::Repeatable::ZeroOrManyTimes(multi_or_empty) => {
377                let (ident, mut token_stream) = multi_or_empty.to_token_stream(context, ident);
378                let name = format_ident!(
379                    "{}",
380                    ident.to_string().to_table_case().to_pascal_case(),
381                    span = ident.span()
382                );
383
384                // println!("ZeroOrManyTimes -> {}", name);
385
386                let token = quote! {
387                    pub type #name = ::std::vec::Vec<#ident>;
388                };
389
390                let defined = context.get(&name).is_some();
391                if !defined {
392                    context.insert(name.clone(), token.clone());
393
394                    token_stream.extend(token);
395                } else {
396                    // TODO
397                }
398
399                (ident, token_stream)
400            }
401        }
402    }
403}
404
405impl ToTokenStream for parser::Child {
406    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
407        match self {
408            parser::Child::Name(name) => name.to_token_stream(context, ident),
409            parser::Child::Seq(seq) => seq.to_token_stream(context, ident),
410            parser::Child::Choices(choices) => choices.to_token_stream(context, ident),
411        }
412    }
413}
414
415impl<T> ToTokenStream for parser::Seq<T>
416where
417    T: ToTokenStream + ::std::fmt::Display,
418{
419    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
420        let (names, mut token_streams): (::std::vec::Vec<_>, ::std::vec::Vec<_>) = self
421            .iter()
422            .map(|c| c.to_token_stream(context, ident))
423            .unzip();
424
425        let mut names_iter = names.iter();
426        let mut ident = format_ident!("Tuple{}", names_iter.next().unwrap(), span = ident.span());
427        for name in names_iter {
428            ident = format_ident!("{}{}", ident, name, span = ident.span());
429        }
430        let fields = names
431            .iter()
432            .map(|name| safe!(format_ident!("{}", name.to_string().to_snake_case())));
433
434        let token = quote! {
435            #[derive(Clone, Debug)]
436            pub struct #ident {
437                #(pub #fields: #names), *
438            }
439        };
440        let defined = context.get(&ident).is_some();
441
442        if !defined {
443            context.insert(ident.clone(), token.clone());
444
445            token_streams.push(token);
446        } else {
447            // TODO: Ensure that's same.
448        }
449        (ident, TokenStream2::from_iter(token_streams.into_iter()))
450    }
451}
452
453impl<T> ToTokenStream for parser::Choices<T>
454where
455    T: ToTokenStream + ::std::fmt::Display,
456{
457    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
458        let ident = format_ident!("{}Choices", ident, span = ident.span());
459        let (names, mut token_streams): (Vec<_>, Vec<_>) = self
460            .iter()
461            .map(|c| c.to_token_stream(context, &ident))
462            .unzip();
463
464        let token = quote! {
465            #[derive(Clone, Debug)]
466            pub enum #ident {
467                #(#names(#names),)*
468            }
469        };
470
471        assert!(context.get(&ident).is_none());
472        context.insert(ident.clone(), token.clone());
473
474        token_streams.push(token);
475        (ident, TokenStream2::from_iter(token_streams.into_iter()))
476    }
477}
478
479impl ToTokenStream for parser::Name {
480    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
481        let ident = format_ident!("{}", self.as_str().to_pascal_case(), span = ident.span());
482
483        let defined = context.get(&ident).is_some();
484        if defined {
485            // println!(" --> {} already exists.", ident.to_string());
486            (ident, TokenStream2::new())
487        } else {
488            let token = quote! {
489                #[derive(Clone, Debug)]
490                pub struct #ident;
491            };
492            context.insert(ident.clone(), token.clone());
493            (ident, token.into())
494        }
495    }
496}
497
498/// attdefs: [
499///     AttDef {
500///         name: Name(
501///             "align",
502///         ),
503///         atttype: EnumeratedType(
504///             Enumeration(
505///                 Enumeration(
506///                     [
507///                         Nmtoken(
508///                             "left",
509///                         ),
510///                         Nmtoken(
511///                             "center",
512///                         ),
513///                         Nmtoken(
514///                             "right",
515///                         ),
516///                     ],
517///                 ),
518///             ),
519///         ),
520///         default_decl: Implied,
521///     },
522///     AttDef {
523///         name: Name(
524///             "width",
525///         ),
526///         atttype: StringType,
527///         default_decl: Implied,
528///     },
529/// ],
530impl ToTokenStream for parser::AttDef {
531    fn to_token_stream(&self, context: &mut Context, ident: &Ident) -> (Ident, TokenStream2) {
532        let mut tokens = Vec::new();
533        let (name, tokens) = match self.atttype {
534            parser::AttType::StringType => {
535                let ident = format_ident!("{}StringType", ident);
536                if context.get(&ident).is_none() {
537                    let token = quote! {
538                        #[derive(Clone, Debug)]
539                        pub struct #ident(pub String);
540                        impl ::std::convert::TryFrom<&str> for #ident {
541                            type Error = ::std::string::String;
542                            fn try_from(s: &str) -> Result<#ident, Self::Error> {
543                                ::std::result::Result::Ok(#ident(s.to_string()))
544                            }
545                        }
546                    };
547                    context.insert(ident.clone(), token.clone());
548                    tokens.push(token);
549                }
550                (ident, tokens)
551            }
552            parser::AttType::TokenizedType(ref _tokenized_type) => {
553                // FIXME: https://www.w3.org/TR/REC-xml/#NT-TokenizedType
554                let ident = format_ident!("{}TokenizedType", ident);
555                if context.get(&ident).is_none() {
556                    eprintln!(
557                        "TokenizedType `{}` has been implemented as `String`.",
558                        _tokenized_type
559                    );
560                    let token = quote! {
561                        #[derive(Clone, Debug)]
562                        pub struct #ident(pub String);
563                        impl ::std::convert::TryFrom<&str> for #ident {
564                            type Error = ::std::string::String;
565                            fn try_from(s: &str) -> Result<#ident, Self::Error> {
566                                ::std::result::Result::Ok(#ident(s.to_string()))
567                            }
568                        }
569                    };
570                    context.insert(ident.clone(), token.clone());
571                    tokens.push(token);
572                }
573                (ident, tokens)
574            }
575            parser::AttType::EnumeratedType(ref enumerated_type) => {
576                match enumerated_type {
577                    parser::EnumeratedType::NotationType(_notation_type) => {
578                        // FIXME: For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY.
579                        unimplemented!("NotationType {}", _notation_type);
580                    }
581                    parser::EnumeratedType::Enumeration(enumeration) => {
582                        let name_types = format_ident!("{}EnumerationType", ident);
583                        let variants = enumeration
584                            .iter()
585                            .map(|e| format_ident!("{}", e.to_pascal_case()));
586                        let variants2 = variants.clone();
587                        let values = enumeration.iter().map(|v| {
588                            let v = v.as_str();
589                            quote! { #v }
590                        });
591                        let values2: Vec<&str> = enumeration.iter().map(|v| v.as_str()).collect();
592                        let values2 = values2.join(", ");
593                        if context.get(&name_types).is_none() {
594                            let token = quote! {
595                                #[derive(Clone, Debug)]
596                                pub enum #name_types {
597                                    #(#variants, )*
598                                }
599                                impl ::std::convert::TryFrom<&str> for #name_types {
600                                    type Error = ::std::string::String;
601                                    fn try_from(s: &str) -> Result<#name_types, Self::Error> {
602                                        match s {
603                                            #(#values => ::std::result::Result::Ok(#name_types::#variants2),)*
604                                            _ => ::std::result::Result::Err(format!("value must be one of `{}`, but not `{}`.", #values2, s))
605                                        }
606                                    }
607                                }
608                            };
609                            context.insert(name_types.clone(), token.clone());
610                            tokens.push(token);
611                        } else {
612                        }
613                        (name_types, tokens)
614                    }
615                }
616            }
617        };
618        (name, TokenStream2::from_iter(tokens.into_iter()))
619    }
620}