utem/lib.rs
1// lib.rs
2// UTEM: Universal Text Encoding as Meaning
3// Copyright 2017 (c) Aldaron's Tech
4// Copyright 2017 (c) Jeron Lau
5// Licensed under the MIT LICENSE
6
7#![doc(
8 html_logo_url =
9 "https://rawgit.com/aldarons-tech/utem/master/res/icon.svg",
10 html_favicon_url =
11 "https://rawgit.com/aldarons-tech/utem/master/res/symbol.svg",
12 html_root_url = "http://at.plopgrizzly.tech/utem/"
13)]
14
15mod dictionary;
16
17mod english;
18
19pub enum Language {
20 /// The language that the system uses.
21 Default,
22 /// Intermediate representation between languages.
23 RanSlat,
24 English,
25 Spanish,
26}
27
28// const END: u8 = 0b_0000;
29// const NOUN: u8 = 0b_0001;
30// const VERB: u8 = 0b_0010;
31// const ADJECTIVE: u8 = 0b_0011;
32// const ARTICLE: u8 = 0b_0100;
33
34// const END_TEXT: u8 = END & (0b_0000 << 4);
35// const END_PARAGRAPH: u8 = END & (0b_0001 << 4);
36// const END_SENTENCE: u8 = END & (0b_0010 << 4);
37// const END_CLAUSE: u8 = END & (0b_0011 << 4);
38
39// const NOUN_UNDERIVED: u8 = NOUN & (0b_0000 << 4);
40// const NOUN_VERB: u8 = NOUN & (0b_0001 << 4); // A Noun Form of A Verb ( *Hiking* is fun from *to hike* )
41// const NOUN_ADJECTIVE: u8 = NOUN & (0b_0010 << 4); // A Noun Form of An Adjective ( *Red* is a great color from *red* )
42
43// const VERB_INFINITIVE: u8 = VERB & (0b_0000 << 4);
44// const VERB_: u8 = VERB & (0b_0001 << 4);
45
46// const ADJECTIVE_ADNOUN: u8 = ADJECTIVE & (0b_0000 << 4);
47// const ADJECTIVE_ADVERB: u8 = ADJECTIVE & (0b_0001 << 4);
48
49// const ARTICLE_INDEFINITE: u8 = ARTICLE & (0b_0000 << 4); // A, An, Some
50// const ARTICLE_DEFINITE: u8 = ARTICLE & (0b_0001 << 4); // The
51
52// Independant Clause Types
53const IC_STATE: u8 = 0b_0000; // IC ending in .
54const IC_ASK: u8 = 0b_0001; // IC ending in ?
55const IC_EXCLAIM: u8 = 0b_0010; // IC ending in !
56const IC_SURPRISE: u8 = 0b_0011; // IC ending in ‽
57const IC_EXCLASK: u8 = 0b_0100; // IC ending in !?
58const IC_ASKEXCL: u8 = 0b_0101; // IC ending in ?!
59const IC_TRAIL: u8 = 0b_0110; // IC ending in ....
60const IC_OTHER: u8 = 0b_0111; // Next Codepoint Tells The Punctuation
61
62const IC_N: u8 = 0b_1000; // IC ending in no punctuation
63// const IC_NASK: u8 = 0b_0001; // IC ending in ?s, number defined by next codepoint
64// const IC_NEXCLAIM: u8 = 0b_0010; // IC ending in !s, number defined by next codepoint
65// const IC_NSURPRISE: u8 = 0b_0011; // IC ending in ‽s, number defined by next codepoint
66// const IC_NEXCLASK: u8 = 0b_0100; // IC ending in !?s, number defined by next codepoint
67// const IC_NASKEXCL: u8 = 0b_0101; // IC ending in ?!s, number defined by next codepoint
68// const IC_NTRAIL: u8 = 0b_0110; // IC ending in ...'s, number defined by next codepoint
69// const IC_NOTHER: u8 = 0b_0111; // number defined by next codepoint, codepoint after next tells The punctuation to repeat,
70
71const IC_C_NORM: u8 = (0b_00 << 6);
72const IC_C_QUOTE: u8 = (0b_01 << 6);
73const IC_C_PARENTHETICAL: u8 = (0b_10 << 6);
74const IC_C_OTHER: u8 = (0b_11 << 6); // Other forms
75
76const IC_SENT: u8 = 0b_00 << 4; // Complete sentence
77const IC_WORD: u8 = 0b_01 << 4; // A noun phrase
78const IC_COFF: u8 = 0b_10 << 4; // Cut off sentence
79// const IC_SENT_QUOTE: u8 = IC_C_QUOTE & IC_SENT; // Complete sentence
80// const IC_WORD_QUOTE: u8 = IC_C_QUOTE & IC_WORD; // A noun phrase
81// const IC_COFF_QUOTE: u8 = IC_C_QUOTE & IC_COFF; // Cut off sentence
82// const IC_SENT_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_SENT; // Complete sentence
83// const IC_WORD_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_WORD; // A noun phrase
84// const IC_COFF_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_COFF; // Cut off sentence
85
86fn unwrap<T>(a: Option<T>) -> T {
87 if let Some(r) = a { r }
88 else { panic!("Unexpected end of UTEM text.") }
89}
90
91/// Decode UTEM as UTF8 using the specified language.
92pub fn decode(_/*language*/: Language, utem_text: &[u8]) -> String {
93 let mut text_it = utem_text.iter();
94 let mut string = String::new();
95
96 loop {
97 let ic_type = unwrap(text_it.next());
98
99 match ic_type & 0b_11_00_0000 {
100 IC_C_NORM => string.push('x'),
101 IC_C_QUOTE => string.push('"'),
102 IC_C_PARENTHETICAL => string.push('('),
103 IC_C_OTHER => string.push('_'), // placeholder
104 _ => unreachable!(),
105 }
106 match ic_type & 0b_00_11_0000 {
107 IC_SENT => string.push(';'),
108 IC_WORD => string.push('w'),
109 IC_COFF => string.push('-'),
110 _ => panic!("invalid UTEM"),
111 }
112 match ic_type & 0b_00_00_1000 {
113 IC_N => string.push('#'),
114 _ => string.push('1'),
115 }
116 match ic_type & 0b_00_00_0111 {
117 IC_STATE => string.push('.'),
118 IC_ASK => string.push('?'),
119 IC_EXCLAIM => string.push('!'),
120 IC_SURPRISE => string.push('‽'),
121 IC_EXCLASK => string.push('/'), // !?
122 IC_ASKEXCL => string.push('\\'), // ?!
123 IC_TRAIL => string.push('…'), // ...
124 IC_OTHER => string.push('_'), // placeholder
125 _ => unreachable!(),
126 }
127 }
128}
129
130/// Encode UTF-8 Text as UTEM Text
131pub fn encode(language: Language, utf8_text: &str) -> Vec<u8> {
132 match language {
133 Language::English => english::encode(utf8_text),
134 _ => panic!("Not Supported Yet")
135 }
136}
137
138/// Translate ran-slat into a language.
139pub fn translate(language: Language, ranslat: &str) -> String {
140 match language {
141 Language::English => english::translate(ranslat),
142 _ => panic!("Language not supported yet")
143 }
144}