isla_lib/
zencode.rs

1// BSD 2-Clause License
2//
3// Copyright (c) 2019, 2020 Alasdair Armstrong
4//
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
10//
11// 1. Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13//
14// 2. Redistributions in binary form must reproduce the above copyright
15// notice, this list of conditions and the following disclaimer in the
16// documentation and/or other materials provided with the distribution.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30//! This module implements the name mangling scheme used by Sail
31//!
32//! It allows all ASCII strings to be represented using just the
33//! characters allowed in C identifers. The way it works is all
34//! characters that are not allowed in C identifiers are encoded as
35//! `zX` where `X` is some C allowed character. The letter `z` is
36//! encoded as `zz`. Additionally a 'z' prefix is placed at the start
37//! of the string. This prefix allows us to undo multiple rounds of
38//! encoding, which can happen when Sail does multiple rounds of
39//! monomorphisation. This works as we can decode until either the
40//! string has no `z` prefix or `zz`, in which case we can do one more
41//! decode step and the first letter of the original string was `z`.
42//!
43//! We could adapt this to support unicode by having something like
44//! `zu<codepoint>` for each unicode character, but as Sail does not
45//! allow unicode identifiers this is not supported at the moment.
46//!
47//! The inspiration for this name-mangling scheme is GHC, see:
48//! https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/symbol-names
49
50pub fn encode(input: &str) -> String {
51    let mut output = Vec::with_capacity(input.len() + 1);
52    output.push(0x7a);
53    for c in input[0..].bytes() {
54        if c <= 41 {
55            output.push(0x7a);
56            output.push(c + 16);
57        } else if c <= 47 {
58            output.push(0x7a);
59            output.push(c + 23);
60        } else if c > 57 && c <= 64 {
61            output.push(0x7a);
62            output.push(c + 13);
63        } else if (c > 90 && c <= 94) || c == 96 {
64            output.push(0x7a);
65            output.push(c - 13);
66        } else if c == 0x7a {
67            output.push(0x7a);
68            output.push(0x7a);
69        } else if c > 122 && c <= 126 {
70            output.push(0x7a);
71            output.push(c - 39);
72        } else {
73            output.push(c);
74        }
75    }
76    String::from_utf8(output).unwrap()
77}
78
79pub fn decode(input: &str) -> String {
80    let mut output = Vec::with_capacity(input.len() - 1);
81    let mut next_encoded = false;
82    for c in input[1..].bytes() {
83        if next_encoded {
84            output.push(if c <= 57 {
85                c - 16
86            } else if c <= 70 {
87                c - 23
88            } else if c <= 77 {
89                c - 13
90            } else if c <= 83 {
91                c + 13
92            } else if c == 122 {
93                122
94            } else {
95                c + 39
96            });
97            next_encoded = false
98        } else if c == 0x7a {
99            next_encoded = true
100        } else {
101            output.push(c)
102        }
103    }
104    String::from_utf8(output).unwrap()
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn zdecode() {
113        assert!(decode("zz0") == " ".to_string());
114        assert!(decode("zz1") == "!".to_string());
115        assert!(decode("zz8") == "(".to_string());
116        assert!(decode("zz9") == ")".to_string());
117        assert!(decode("zzA") == "*".to_string());
118        assert!(decode("zzB") == "+".to_string());
119        assert!(decode("zzE") == ".".to_string());
120        assert!(decode("zzF") == "/".to_string());
121        assert!(decode("zzG") == ":".to_string());
122        assert!(decode("zzL") == "?".to_string());
123        assert!(decode("zzM") == "@".to_string());
124        assert!(decode("zzN") == "[".to_string());
125        assert!(decode("zzO") == "\\".to_string());
126        assert!(decode("zzR") == "_".to_string());
127        assert!(decode("zzS") == "`".to_string());
128        assert!(decode("zzT") == "{".to_string());
129        assert!(decode("zzW") == "~".to_string());
130        assert!(decode("zzz") == "z".to_string());
131        assert!(decode("z_") == "_".to_string());
132        assert!(decode("za") == "a".to_string());
133        assert!(decode("zA") == "A".to_string());
134        assert!(decode("zZ") == "Z".to_string());
135        assert!(decode("z1") == "1".to_string());
136        assert!(decode("z9") == "9".to_string());
137        assert!(decode("zy") == "y".to_string());
138        assert!(decode("zz5i64zDzKz5i") == "%i64->%i".to_string());
139    }
140
141    #[test]
142    fn zencode() {
143        assert!("zz0".to_string() == encode(" "));
144        assert!("zz1".to_string() == encode("!"));
145        assert!("zz8".to_string() == encode("("));
146        assert!("zz9".to_string() == encode(")"));
147        assert!("zzA".to_string() == encode("*"));
148        assert!("zzB".to_string() == encode("+"));
149        assert!("zzE".to_string() == encode("."));
150        assert!("zzF".to_string() == encode("/"));
151        assert!("zzG".to_string() == encode(":"));
152        assert!("zzL".to_string() == encode("?"));
153        assert!("zzM".to_string() == encode("@"));
154        assert!("zzN".to_string() == encode("["));
155        assert!("zzO".to_string() == encode("\\"));
156        assert!("zzS".to_string() == encode("`"));
157        assert!("zzT".to_string() == encode("{"));
158        assert!("zzW".to_string() == encode("~"));
159        assert!("zzz".to_string() == encode("z"));
160        assert!("z_".to_string() == encode("_"));
161        assert!("za".to_string() == encode("a"));
162        assert!("zA".to_string() == encode("A"));
163        assert!("zZ".to_string() == encode("Z"));
164        assert!("z1".to_string() == encode("1"));
165        assert!("z9".to_string() == encode("9"));
166        assert!("zz5i64zDzKz5i".to_string() == encode("%i64->%i"));
167    }
168}