utem/
lib.rs

1// lib.rs
2// UTEM: Universal Text Encoding as Meaning
3// Copyright 2017 (c) Aldaron's Tech
4// Copyright 2017 (c) Jeron Lau
5// Licensed under the MIT LICENSE
6
7#![doc(
8	html_logo_url =
9		"https://rawgit.com/aldarons-tech/utem/master/res/icon.svg",
10	html_favicon_url =
11		"https://rawgit.com/aldarons-tech/utem/master/res/symbol.svg",
12	html_root_url = "http://at.plopgrizzly.tech/utem/"
13)]
14
15mod dictionary;
16
17mod english;
18
19pub enum Language {
20	/// The language that the system uses.
21	Default,
22	/// Intermediate representation between languages.
23	RanSlat,
24	English,
25	Spanish,
26}
27
28// const END: u8 = 0b_0000;
29// const NOUN: u8 = 0b_0001;
30// const VERB: u8 = 0b_0010;
31// const ADJECTIVE: u8 = 0b_0011;
32// const ARTICLE: u8 = 0b_0100;
33
34// const END_TEXT: u8 = END & (0b_0000 << 4);
35// const END_PARAGRAPH: u8 = END & (0b_0001 << 4);
36// const END_SENTENCE: u8 = END & (0b_0010 << 4);
37// const END_CLAUSE: u8 = END & (0b_0011 << 4);
38
39// const NOUN_UNDERIVED: u8 = NOUN & (0b_0000 << 4);
40// const NOUN_VERB: u8 = NOUN & (0b_0001 << 4); // A Noun Form of A Verb ( *Hiking* is fun from *to hike* )
41// const NOUN_ADJECTIVE: u8 = NOUN & (0b_0010 << 4); // A Noun Form of An Adjective ( *Red* is a great color from *red* )
42
43// const VERB_INFINITIVE: u8 = VERB & (0b_0000 << 4);
44// const VERB_: u8 = VERB & (0b_0001 << 4);
45
46// const ADJECTIVE_ADNOUN: u8 = ADJECTIVE & (0b_0000 << 4);
47// const ADJECTIVE_ADVERB: u8 = ADJECTIVE & (0b_0001 << 4);
48
49// const ARTICLE_INDEFINITE: u8 = ARTICLE & (0b_0000 << 4); // A, An, Some
50// const ARTICLE_DEFINITE: u8 = ARTICLE & (0b_0001 << 4); // The
51
52// Independant Clause Types
53const IC_STATE: u8 = 0b_0000; // IC ending in .
54const IC_ASK: u8 = 0b_0001; // IC ending in ?
55const IC_EXCLAIM: u8 = 0b_0010; // IC ending in !
56const IC_SURPRISE: u8 = 0b_0011; // IC ending in ‽
57const IC_EXCLASK: u8 = 0b_0100; // IC ending in !?
58const IC_ASKEXCL: u8 = 0b_0101; // IC ending in ?!
59const IC_TRAIL: u8 = 0b_0110; // IC ending in ....
60const IC_OTHER: u8 = 0b_0111; // Next Codepoint Tells The Punctuation
61
62const IC_N: u8 = 0b_1000; // IC ending in no punctuation
63// const IC_NASK: u8 = 0b_0001; // IC ending in ?s, number defined by next codepoint
64// const IC_NEXCLAIM: u8 = 0b_0010; // IC ending in !s, number defined by next codepoint
65// const IC_NSURPRISE: u8 = 0b_0011; // IC ending in ‽s, number defined by next codepoint
66// const IC_NEXCLASK: u8 = 0b_0100; // IC ending in !?s, number defined by next codepoint
67// const IC_NASKEXCL: u8 = 0b_0101; // IC ending in ?!s, number defined by next codepoint
68// const IC_NTRAIL: u8 = 0b_0110; // IC ending in ...'s, number defined by next codepoint
69// const IC_NOTHER: u8 = 0b_0111; // number defined by next codepoint, codepoint after next tells The punctuation to repeat, 
70
71const IC_C_NORM: u8 = (0b_00 << 6);
72const IC_C_QUOTE: u8 = (0b_01 << 6);
73const IC_C_PARENTHETICAL: u8 = (0b_10 << 6);
74const IC_C_OTHER: u8 = (0b_11 << 6); // Other forms
75
76const IC_SENT: u8 = 0b_00 << 4; // Complete sentence
77const IC_WORD: u8 = 0b_01 << 4; // A noun phrase
78const IC_COFF: u8 = 0b_10 << 4; // Cut off sentence
79// const IC_SENT_QUOTE: u8 = IC_C_QUOTE & IC_SENT; // Complete sentence
80// const IC_WORD_QUOTE: u8 = IC_C_QUOTE & IC_WORD; // A noun phrase
81// const IC_COFF_QUOTE: u8 = IC_C_QUOTE & IC_COFF; // Cut off sentence
82// const IC_SENT_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_SENT; // Complete sentence
83// const IC_WORD_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_WORD; // A noun phrase
84// const IC_COFF_PARENTHETICAL: u8 = IC_C_PARENTHETICAL & IC_COFF; // Cut off sentence
85
86fn unwrap<T>(a: Option<T>) -> T {
87	if let Some(r) = a { r }
88	else { panic!("Unexpected end of UTEM text.") }
89}
90
91/// Decode UTEM as UTF8 using the specified language.
92pub fn decode(_/*language*/: Language, utem_text: &[u8]) -> String {
93	let mut text_it = utem_text.iter();
94	let mut string = String::new();
95
96	loop {
97		let ic_type = unwrap(text_it.next());
98
99		match ic_type & 0b_11_00_0000 {
100			IC_C_NORM => string.push('x'),
101			IC_C_QUOTE => string.push('"'),
102			IC_C_PARENTHETICAL => string.push('('),
103			IC_C_OTHER => string.push('_'), // placeholder
104			_ => unreachable!(),
105		}
106		match ic_type & 0b_00_11_0000 {
107			IC_SENT => string.push(';'),
108			IC_WORD => string.push('w'),
109			IC_COFF => string.push('-'),
110			_ => panic!("invalid UTEM"),
111		}
112		match ic_type & 0b_00_00_1000 {
113			IC_N => string.push('#'),
114			_ => string.push('1'),
115		}
116		match ic_type & 0b_00_00_0111 {
117			IC_STATE => string.push('.'),
118			IC_ASK => string.push('?'),
119			IC_EXCLAIM => string.push('!'),
120			IC_SURPRISE => string.push('‽'),
121			IC_EXCLASK => string.push('/'), // !?
122			IC_ASKEXCL => string.push('\\'), // ?!
123			IC_TRAIL => string.push('…'), // ...
124			IC_OTHER => string.push('_'), // placeholder
125			_ => unreachable!(),
126		}
127	}
128}
129
130/// Encode UTF-8 Text as UTEM Text
131pub fn encode(language: Language, utf8_text: &str) -> Vec<u8> {
132	match language {
133		Language::English => english::encode(utf8_text),
134		_ => panic!("Not Supported Yet")
135	}
136}
137
138/// Translate ran-slat into a language.
139pub fn translate(language: Language, ranslat: &str) -> String {
140	match language {
141		Language::English => english::translate(ranslat),
142		_ => panic!("Language not supported yet")
143	}
144}