iso_macro/
lib.rs

1//
2// iso - implementations of data types related to common iso standards
3// Copyright (c) 2021 superwhiskers <whiskerdev@protonmail.com>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at https://mozilla.org/MPL/2.0/.
8
9//
10
11#![allow(clippy::cognitive_complexity)]
12#![warn(clippy::cargo_common_metadata)]
13#![warn(clippy::dbg_macro)]
14#![warn(clippy::explicit_deref_methods)]
15#![warn(clippy::filetype_is_file)]
16#![warn(clippy::imprecise_flops)]
17#![warn(clippy::large_stack_arrays)]
18#![warn(clippy::todo)]
19#![warn(clippy::unimplemented)]
20#![deny(clippy::await_holding_lock)]
21#![deny(clippy::cast_lossless)]
22#![deny(clippy::clone_on_ref_ptr)]
23#![deny(clippy::doc_markdown)]
24#![deny(clippy::empty_enum)]
25#![deny(clippy::enum_glob_use)]
26#![deny(clippy::exit)]
27#![deny(clippy::explicit_into_iter_loop)]
28#![deny(clippy::explicit_iter_loop)]
29#![deny(clippy::fallible_impl_from)]
30#![deny(clippy::inefficient_to_string)]
31#![deny(clippy::large_digit_groups)]
32#![deny(clippy::wildcard_dependencies)]
33#![deny(clippy::wildcard_imports)]
34#![deny(clippy::unused_self)]
35#![deny(clippy::single_match_else)]
36#![deny(clippy::option_option)]
37#![deny(clippy::mut_mut)]
38#![feature(proc_macro_diagnostic)]
39
40use proc_macro::{Diagnostic, Level, TokenStream};
41use proc_macro2::{Literal, Span, TokenStream as TokenStream2};
42use quote::{quote, ToTokens};
43use serde::Deserialize;
44use std::{
45    collections::HashMap,
46    convert::{TryFrom, TryInto},
47    env::var,
48    fmt::Debug,
49    fs::File,
50    io::{BufRead, BufReader},
51    path::{Path, PathBuf},
52    result::Result as StdResult,
53};
54use syn::{
55    parse::{Parse, ParseStream, Result},
56    parse_macro_input, Ident, LitStr, Token,
57};
58
59//TODO(superwhiskers):
60//   - refactor the source code to not be so repetitive
61//   - give proper diagnostics and handle errors well (no `.unwrap()`)
62//   - add documentation comments
63
64/// A structure representing ISO country code entries
65#[derive(Deserialize)]
66#[serde(rename_all = "kebab-case")]
67struct CountryEntry {
68    name: String,
69    alpha_2: String,
70    alpha_3: String,
71    country_code: String,
72}
73
74/// An enumeration over the supported ISO language code formats aas well as the name of the language
75#[derive(PartialEq, Eq, Hash, Clone)]
76enum LanguageTableEntryKey {
77    Iso639_3,
78    Iso639_2b,
79    Iso639_2t,
80    Iso639_1,
81    Name,
82}
83
84impl LanguageTableEntryKey {
85    fn as_standard_code(&self) -> Option<&'static str> {
86        Some(match &self {
87            Self::Iso639_3 => "639-3",
88            Self::Iso639_2b => "639-2b",
89            Self::Iso639_2t => "639-2t",
90            Self::Iso639_1 => "639-1",
91            _ => return None,
92        })
93    }
94}
95
96impl TryFrom<String> for LanguageTableEntryKey {
97    type Error = &'static str;
98
99    fn try_from(string: String) -> StdResult<Self, Self::Error> {
100        Ok(match string.to_lowercase().as_ref() {
101            "iso639_3" => Self::Iso639_3,
102            "iso639_2b" => Self::Iso639_2b,
103            "iso639_2t" => Self::Iso639_2t,
104            "iso639_1" => Self::Iso639_1,
105            "name" => Self::Name,
106            _ => return Err("unable to find a matching variant"),
107        })
108    }
109}
110
111impl TryInto<&'static str> for LanguageTableEntryKey {
112    type Error = &'static str;
113
114    fn try_into(self) -> StdResult<&'static str, Self::Error> {
115        Ok(match &self {
116            Self::Iso639_3 => "Iso639_3",
117            Self::Iso639_2b => "Iso639_2b",
118            Self::Iso639_2t => "Iso639_2t",
119            Self::Iso639_1 => "Iso639_1",
120            _ => return Err("unable to find a matching string"),
121        })
122    }
123}
124
125#[derive(PartialEq, Eq, Hash, Clone)]
126enum CountryIdentifierKey {
127    Alpha2,
128    Alpha3,
129    Numeric,
130    Name,
131}
132
133impl CountryIdentifierKey {
134    fn as_standard_code(&self) -> Option<&'static str> {
135        Some(match &self {
136            Self::Alpha2 => "3166-1 alpha-2",
137            Self::Alpha3 => "3166-1 alpha-3",
138            Self::Numeric => "3166-1 numeric",
139            _ => return None,
140        })
141    }
142}
143
144impl TryFrom<String> for CountryIdentifierKey {
145    type Error = &'static str;
146
147    fn try_from(string: String) -> StdResult<Self, Self::Error> {
148        Ok(match string.to_lowercase().as_ref() {
149            "iso3166_1_alpha_2" => Self::Alpha2,
150            "iso3166_1_alpha_3" => Self::Alpha3,
151            "iso3166_1_numeric" => Self::Numeric,
152            "name" => Self::Name,
153            _ => return Err("unable to find a matching variant"),
154        })
155    }
156}
157
158impl TryInto<&'static str> for CountryIdentifierKey {
159    type Error = &'static str;
160
161    fn try_into(self) -> StdResult<&'static str, Self::Error> {
162        Ok(match &self {
163            Self::Alpha2 => "Iso3166_1_alpha_2",
164            Self::Alpha3 => "Iso3166_1_alpha_3",
165            _ => return Err("unable to find a matching string"),
166        })
167    }
168}
169
170fn parse_country_codes(dataset: &Path) -> Option<Vec<CountryEntry>> {
171    let country_reader = BufReader::new(match File::open(dataset) {
172        Ok(file) => file,
173        Err(e) => {
174            Diagnostic::new(
175                Level::Error,
176                format!(
177                    "Unable to load the country code dataset, {}",
178                    dataset.as_os_str().to_string_lossy()
179                ),
180            )
181            .note(format!("{}", e))
182            .emit();
183            return None;
184        }
185    });
186
187    Some(match serde_json::from_reader(country_reader) {
188        Ok(parsed) => parsed,
189        Err(e) => {
190            Diagnostic::new(
191                Level::Error,
192                format!(
193                    "Unable to parse the country code dataset, {}",
194                    dataset.as_os_str().to_string_lossy()
195                ),
196            )
197            .note(format!("{}", e))
198            .emit();
199            return None;
200        }
201    })
202}
203
204fn parse_language_table(table: &Path) -> Option<Vec<HashMap<LanguageTableEntryKey, String>>> {
205    let table_reader = BufReader::new(match File::open(table) {
206        Ok(file) => file,
207        Err(e) => {
208            Diagnostic::new(
209                Level::Error,
210                format!(
211                    "Unable to load the language table, {}",
212                    table.as_os_str().to_string_lossy()
213                ),
214            )
215            .note(format!("{}", e))
216            .emit();
217            return None;
218        }
219    });
220
221    Some(
222        table_reader
223            .lines()
224            .skip(1)
225            .filter_map(|raw_line| {
226                let line = match &raw_line {
227                    Ok(s) => s,
228                    Err(_) => return None,
229                }
230                .split('\t')
231                .collect::<Vec<&str>>();
232
233                let mut entry = HashMap::new();
234                entry.insert(LanguageTableEntryKey::Iso639_3, line[0].to_string());
235                if line[1].len() == 3 {
236                    entry.insert(LanguageTableEntryKey::Iso639_2b, line[1].to_string());
237                }
238                if line[2].len() == 3 {
239                    entry.insert(LanguageTableEntryKey::Iso639_2t, line[2].to_string());
240                }
241                if line[3].len() == 2 {
242                    entry.insert(LanguageTableEntryKey::Iso639_1, line[3].to_string());
243                }
244                entry.insert(LanguageTableEntryKey::Name, line[6].to_string());
245
246                Some(entry)
247            })
248            .collect(),
249    )
250}
251
252fn parse_language_table_from_environment() -> Option<Vec<HashMap<LanguageTableEntryKey, String>>> {
253    let mut language_table_path = PathBuf::from(var("CARGO_MANIFEST_DIR").unwrap());
254    language_table_path.push("assets/language.tab");
255    parse_language_table(&language_table_path)
256}
257
258fn parse_country_codes_from_environment() -> Option<Vec<CountryEntry>> {
259    let mut country_codes_path = PathBuf::from(var("CARGO_MANIFEST_DIR").unwrap());
260    country_codes_path.push("assets/country.json");
261    parse_country_codes(&country_codes_path)
262}
263
264// note: the second parameter of each tuple is `true` if a string or integer is being worked with
265struct GenerationInput<K>
266where
267    K: TryFrom<String>,
268    K::Error: Debug,
269{
270    enumeration: Option<String>,
271    match_against: Option<TokenStream2>,
272    lhs: (K, bool),
273    rhs: Option<(K, bool)>,
274}
275
276impl<K> Parse for GenerationInput<K>
277where
278    K: TryFrom<String>,
279    K::Error: Debug,
280{
281    fn parse(input: ParseStream) -> Result<Self> {
282        let keyword = input.lookahead1();
283        let enumeration = if keyword.peek(Token![enum]) {
284            input.parse::<Token![enum]>()?;
285            let enumeration_name = input.parse::<Ident>()?.to_string();
286            input.parse::<Token![:]>()?;
287            Some(enumeration_name)
288        } else {
289            None
290        };
291        let match_against = if keyword.peek(Token![match]) {
292            input.parse::<Token![match]>()?;
293            let match_against = input.lookahead1();
294            let match_against = if match_against.peek(Token![&]) {
295                input.parse::<Token![&]>()?;
296                input.parse::<Token![self]>()?;
297                Some(quote! { &self })
298            } else if match_against.peek(Ident) {
299                Some(input.parse::<Ident>()?.to_token_stream())
300            } else {
301                None
302            };
303            input.parse::<Token![:]>()?;
304            match_against
305        } else {
306            None
307        };
308        let lhs = input.lookahead1();
309        let lhs = if lhs.peek(Ident) {
310            (
311                input.parse::<Ident>()?.to_string().try_into().unwrap(),
312                false,
313            )
314        } else if lhs.peek(LitStr) {
315            (input.parse::<LitStr>()?.value().try_into().unwrap(), true)
316        } else {
317            return Err(lhs.error());
318        };
319        let token = input.lookahead1();
320        let rhs = if token.peek(Token![=>]) {
321            input.parse::<Token![=>]>()?;
322            let rhs = input.lookahead1();
323            Some(if rhs.peek(Ident) {
324                (
325                    input.parse::<Ident>()?.to_string().try_into().unwrap(),
326                    false,
327                )
328            } else if rhs.peek(LitStr) {
329                (input.parse::<LitStr>()?.value().try_into().unwrap(), true)
330            } else {
331                return Err(rhs.error());
332            })
333        } else {
334            None
335        };
336
337        Ok(GenerationInput {
338            enumeration,
339            match_against,
340            lhs,
341            rhs,
342        })
343    }
344}
345
346fn ascii_formatter(string: &mut str) {
347    if let Some(start) = string.get_mut(0..1) {
348        start.make_ascii_uppercase();
349    }
350    if let Some(remainder) = string.get_mut(1..) {
351        remainder.make_ascii_lowercase();
352    }
353}
354
355#[proc_macro]
356pub fn country_identifiers_from_table(tokens: TokenStream) -> TokenStream {
357    let country_codes = parse_country_codes_from_environment().unwrap();
358    let GenerationInput {
359        enumeration,
360        match_against,
361        lhs,
362        rhs,
363    } = parse_macro_input!(tokens as GenerationInput<CountryIdentifierKey>);
364
365    let mut rows: Vec<proc_macro2::TokenStream> = Vec::new();
366    for codes in country_codes {
367        match (&lhs, &rhs) {
368            ((lhs_key, true), None) => {
369                let lhs = Literal::string(match &lhs_key {
370                    CountryIdentifierKey::Alpha2 => &codes.alpha_2,
371                    CountryIdentifierKey::Alpha3 => &codes.alpha_3,
372                    CountryIdentifierKey::Numeric => {
373                        panic!("numeric identifiers cannot be used alone")
374                    }
375                    CountryIdentifierKey::Name => panic!("names cannot be used alone"),
376                });
377                rows.push(quote! {
378                    #lhs
379                });
380            }
381            ((lhs_key, false), None) => {
382                let mut lhs_string = match &lhs_key {
383                    CountryIdentifierKey::Alpha2 => codes.alpha_2,
384                    CountryIdentifierKey::Alpha3 => codes.alpha_3,
385                    CountryIdentifierKey::Numeric => {
386                        panic!("numeric identifiers cannot be used as an identifier")
387                    }
388                    CountryIdentifierKey::Name => panic!("names cannot be used as an identifier"),
389                };
390                ascii_formatter(&mut lhs_string);
391                let lhs = Ident::new(&lhs_string, Span::call_site());
392                rows.push(quote! {
393                    #lhs
394                });
395            }
396            ((lhs_key, true), Some((rhs_key, true))) => {
397                let lhs = match &lhs_key {
398                    CountryIdentifierKey::Alpha2 => Literal::string(&codes.alpha_2),
399                    CountryIdentifierKey::Alpha3 => Literal::string(&codes.alpha_3),
400                    CountryIdentifierKey::Numeric => Literal::u16_unsuffixed(codes.country_code.parse().unwrap()),
401                    CountryIdentifierKey::Name => Literal::string(&codes.name),
402                };
403                let rhs = match &rhs_key {
404                    CountryIdentifierKey::Alpha2 => Literal::string(&codes.alpha_2),
405                    CountryIdentifierKey::Alpha3 => Literal::string(&codes.alpha_3),
406                    CountryIdentifierKey::Numeric => Literal::u16_unsuffixed(codes.country_code.parse().unwrap()),
407                    CountryIdentifierKey::Name => Literal::string(&codes.name),
408                };
409                rows.push(quote! {
410                    #lhs => #rhs
411                });
412            }
413            ((lhs_key, false), Some((rhs_key, true))) => {
414                let mut lhs_string = match &lhs_key {
415                    CountryIdentifierKey::Alpha2 => codes.alpha_2.clone(),
416                    CountryIdentifierKey::Alpha3 => codes.alpha_3.clone(),
417                    CountryIdentifierKey::Numeric => {
418                        panic!("numeric identifiers cannot be used as an identifier")
419                    }
420                    CountryIdentifierKey::Name => panic!("names cannot be used as an identifier"),
421                };
422                ascii_formatter(&mut lhs_string);
423                let lhs = Ident::new(&lhs_string, Span::call_site());
424                let lhs_path = Ident::new(lhs_key.clone().try_into().unwrap(), Span::call_site());
425                let rhs = match &rhs_key {
426                    CountryIdentifierKey::Alpha2 => Literal::string(&codes.alpha_2),
427                    CountryIdentifierKey::Alpha3 => Literal::string(&codes.alpha_3),
428                    CountryIdentifierKey::Numeric => Literal::u16_unsuffixed(codes.country_code.parse().unwrap()),
429                    CountryIdentifierKey::Name => Literal::string(&codes.name),
430                };
431                rows.push(quote! {
432                    #lhs_path::#lhs => #rhs
433                });
434            }
435            ((lhs_key, true), Some((rhs_key, false))) => {
436                let lhs = match &lhs_key {
437                    CountryIdentifierKey::Alpha2 => Literal::string(&codes.alpha_2),
438                    CountryIdentifierKey::Alpha3 => Literal::string(&codes.alpha_3),
439                    CountryIdentifierKey::Numeric => Literal::u16_unsuffixed(codes.country_code.parse().unwrap()),
440                    CountryIdentifierKey::Name => Literal::string(&codes.name),
441                };
442                let mut rhs_string = match &rhs_key {
443                    CountryIdentifierKey::Alpha2 => codes.alpha_2.clone(),
444                    CountryIdentifierKey::Alpha3 => codes.alpha_3.clone(),
445                    CountryIdentifierKey::Numeric => {
446                        panic!("numeric identifiers cannot be used as an identifier")
447                    }
448                    CountryIdentifierKey::Name => panic!("names cannot be used as an identifier"),
449                };
450                ascii_formatter(&mut rhs_string);
451                let rhs = Ident::new(&rhs_string, Span::call_site());
452                let rhs_path = Ident::new(rhs_key.clone().try_into().unwrap(), Span::call_site());
453                rows.push(quote! {
454                    #lhs => Some(#rhs_path::#rhs)
455                });
456            }
457            ((lhs_key, false), Some((rhs_key, false))) => {
458                let mut lhs_string = match &lhs_key {
459                    CountryIdentifierKey::Alpha2 => codes.alpha_2.clone(),
460                    CountryIdentifierKey::Alpha3 => codes.alpha_3.clone(),
461                    CountryIdentifierKey::Numeric => {
462                        panic!("numeric identifiers cannot be used as an identifier")
463                    }
464                    CountryIdentifierKey::Name => panic!("names cannot be used as an identifier"),
465                };
466                ascii_formatter(&mut lhs_string);
467                let lhs = Ident::new(&lhs_string, Span::call_site());
468                let lhs_path = Ident::new(lhs_key.clone().try_into().unwrap(), Span::call_site());
469                let mut rhs_string = match &rhs_key {
470                    CountryIdentifierKey::Alpha2 => codes.alpha_2.clone(),
471                    CountryIdentifierKey::Alpha3 => codes.alpha_3.clone(),
472                    CountryIdentifierKey::Numeric => {
473                        panic!("numeric identifiers cannot be used as an identifier")
474                    }
475                    CountryIdentifierKey::Name => panic!("names cannot be used as an identifier"),
476                };
477                ascii_formatter(&mut rhs_string);
478                let rhs = Ident::new(&rhs_string, Span::call_site());
479                let rhs_path = Ident::new(rhs_key.clone().try_into().unwrap(), Span::call_site());
480
481                // we don't need optionals here because there's always an alpha3 and numeric code for every alpha2 and friends
482                rows.push(quote! {
483                    #lhs_path::#lhs => #rhs_path::#rhs
484                });
485            }
486        }
487    }
488
489    return TokenStream::from(if let Some(enumeration_name) = enumeration {
490        let enumeration_name = Ident::new(&enumeration_name, Span::call_site());
491        let iso_code = lhs.0.as_standard_code();
492        if let Some(iso_code) = iso_code {
493            quote! {
494                /// Enumeration over all possible ISO
495                #[doc = #iso_code]
496                /// country codes
497                #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
498                #[cfg_attr(feature = "serde", serde(rename_all = "UPPERCASE"))]
499                #[derive(Debug, Hash, Copy, Clone, Eq, PartialEq)]
500                pub enum #enumeration_name {
501                    #(#rows),*
502                }
503            }
504        } else {
505            quote! {
506                compile_error!("the selected key to generate an enumeration from does not have a corresponding iso standard")
507            }
508        }
509    } else if let Some(match_against) = match_against {
510        if lhs.1 {
511            quote! {
512                match #match_against {
513                    #(#rows),*,
514                    _ => None,
515                }
516            }
517        } else {
518            quote! {
519                match #match_against {
520                    #(#rows),*
521                }
522            }
523        }
524    } else {
525        quote! {
526            compile_error!("not enough information was provided");
527        }
528    });
529}
530
531#[proc_macro]
532pub fn language_identifiers_from_table(tokens: TokenStream) -> TokenStream {
533    let table = parse_language_table_from_environment().unwrap();
534    let GenerationInput {
535        enumeration,
536        match_against,
537        lhs,
538        rhs,
539    } = parse_macro_input!(tokens as GenerationInput<LanguageTableEntryKey>);
540
541    let mut rows: Vec<proc_macro2::TokenStream> = Vec::new();
542    for table_entry in table {
543        if table_entry.get(&lhs.0).is_none() {
544            continue;
545        }
546        match (&lhs, &rhs) {
547            ((lhs_table, true), None) => {
548                let lhs = Literal::string(&table_entry[lhs_table]);
549                rows.push(quote! {
550                    #lhs
551                });
552            }
553            ((lhs_table, false), None) => {
554                let mut lhs_string = table_entry[lhs_table].clone();
555                ascii_formatter(&mut lhs_string);
556                let lhs: Ident = Ident::new(&lhs_string, Span::call_site());
557                rows.push(quote! {
558                    #lhs
559                });
560            }
561            ((lhs_table, true), Some((rhs_table, true))) => {
562                let lhs = Literal::string(&table_entry[lhs_table]);
563                let rhs = Literal::string(&table_entry[rhs_table]);
564                rows.push(quote! {
565                    #lhs => #rhs
566                });
567            }
568            ((lhs_table, false), Some((rhs_table, true))) => {
569                let mut lhs_string = table_entry[lhs_table].clone();
570                ascii_formatter(&mut lhs_string);
571                let lhs = Ident::new(&lhs_string, Span::call_site());
572
573                // while this technically isn't safe, trying to generate a literal for a name is impossible
574                let lhs_path = Ident::new(lhs_table.clone().try_into().unwrap(), Span::call_site());
575
576                let rhs = Literal::string(&table_entry[rhs_table]);
577                rows.push(quote! {
578                    #lhs_path::#lhs => #rhs
579                })
580            }
581            ((lhs_table, true), Some((rhs_table, false))) => {
582                let lhs = Literal::string(&table_entry[lhs_table]);
583                if let Some(rhs) = table_entry.get(rhs_table) {
584                    let mut rhs_string = rhs.clone();
585                    ascii_formatter(&mut rhs_string);
586                    let rhs = Ident::new(&rhs_string, Span::call_site());
587
588                    // while this technically isn't safe, trying to generate a literal for a name is impossible
589                    let rhs_path =
590                        Ident::new(rhs_table.clone().try_into().unwrap(), Span::call_site());
591                    rows.push(quote! {
592                        #lhs => Some(#rhs_path::#rhs)
593                    })
594                } else {
595                    rows.push(quote! {
596                        #lhs => None
597                    })
598                }
599            }
600            ((lhs_table, false), Some((rhs_table, false))) => {
601                let mut lhs_string = table_entry[lhs_table].clone();
602                ascii_formatter(&mut lhs_string);
603                let lhs = Ident::new(&lhs_string, Span::call_site());
604
605                // while this technically isn't safe, trying to generate a literal for a name is impossible
606                let lhs_path = Ident::new(lhs_table.clone().try_into().unwrap(), Span::call_site());
607                if let Some(rhs) = table_entry.get(rhs_table) {
608                    let mut rhs_string = rhs.clone();
609                    ascii_formatter(&mut rhs_string);
610                    let rhs = Ident::new(&rhs_string, Span::call_site());
611
612                    // while this technically isn't safe, trying to generate a literal for a name is impossible
613                    let rhs_path =
614                        Ident::new(rhs_table.clone().try_into().unwrap(), Span::call_site());
615                    rows.push(quote! {
616                        #lhs_path::#lhs => Some(#rhs_path::#rhs)
617                    })
618                } else {
619                    rows.push(quote! {
620                        #lhs_path::#lhs => None
621                    })
622                }
623            }
624        }
625    }
626
627    return TokenStream::from(if let Some(enumeration_name) = enumeration {
628        let enumeration_name = Ident::new(&enumeration_name, Span::call_site());
629        let iso_code = lhs.0.as_standard_code();
630        if let Some(iso_code) = iso_code {
631            quote! {
632                /// Enumeration over all possible ISO
633                #[doc = #iso_code]
634                /// language codes
635                #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
636                #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
637                #[derive(Debug, Hash, Copy, Clone, Eq, PartialEq)]
638                pub enum #enumeration_name {
639                    #(#rows),*
640                }
641            }
642        } else {
643            quote! {
644                compile_error!("the selected table column to generate an enumeration from does not have a corresponding iso standard")
645            }
646        }
647    } else if let Some(match_against) = match_against {
648        if lhs.1 {
649            quote! {
650                match #match_against {
651                    #(#rows),*,
652                    _ => None,
653                }
654            }
655        } else {
656            quote! {
657                match #match_against {
658                    #(#rows),*
659                }
660            }
661        }
662    } else {
663        quote! {
664            compile_error!("not enough information was provided");
665        }
666    });
667}