zuzu-rust 0.4.0

Rust implementation of ZuzuScript
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
=encoding utf8

=head1 NAME

std/string/quoted_printable - Quoted-printable encoders and decoders.

=head1 SYNOPSIS

  from std/string/quoted_printable import encode, decode;

  let raw := to_binary( "Hello, world!\r\n" );

  let text := encode(raw);
  let bytes := decode(text);

  let binary_text := encode(raw, binary: true);
  let short_lines := encode(raw, line_length: 40, newline: "\n");

=head1 IMPLEMENTATION SUPPORT

This module is supported by all implementations of ZuzuScript.

=head1 DESCRIPTION

This module provides quoted-printable encoding and decoding helpers for
RFC 2045-style byte transport. Encoding returns ASCII C<String> text.
Decoding returns a C<BinaryString>, because quoted-printable is a byte
transfer encoding rather than a Unicode text format.

The C<binary> option controls how input line break bytes are encoded.
In the default non-binary mode, CRLF, CR, and LF bytes are normalized to
the configured C<newline> string. In binary mode, CR and LF bytes are
encoded as C<=0D> and C<=0A>.

=head1 EXPORTS

=head2 Functions

=over

=item * C<encode(BinaryString bytes, ... PairList options)>

Parameters: C<bytes> is binary input data and C<options> controls
encoding. Returns: C<String>. Encodes C<bytes> as quoted-printable ASCII
text.

=item * C<decode(String text, ... PairList options)>

Parameters: C<text> is quoted-printable text and C<options> controls
strictness. Returns: C<BinaryString>. Decodes quoted-printable text into
bytes.

=back

=head1 OPTIONS

=over

=item * C<line_length>

Maximum encoded line length. Defaults to C<76> and must be at least
C<4>.

=item * C<newline>

Output newline for hard line breaks and encoded soft breaks. Defaults
to CRLF.

=item * C<binary>

When true, encode CR and LF bytes as C<=0D> and C<=0A>. Defaults to
false.

=item * C<strict>

When true, malformed quoted-printable escape sequences throw during
decoding. Non-strict decoding preserves malformed escape text
literally.

=back

=head1 COPYRIGHT AND LICENCE

B<< std/string/quoted_printable >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut


let encode := null;
let decode := null;

{
	from std/string import chr, index, ord, substr;
	from std/string/base64 import
		encode as _base64_encode,
		decode as _base64_decode;

	let _B64_ALPHABET := "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
		_ "abcdefghijklmnopqrstuvwxyz0123456789+/";
	let _HEX := "0123456789ABCDEF";

	function _div_floor ( Number n, Number d ) {
		return floor( n / d );
	}

	function _mod ( Number n, Number d ) {
		return n - _div_floor( n, d ) * d;
	}

	function _bytes_to_binary ( Array bytes ) {
		let out := "";
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let b0 := bytes[i];
			let b1 := null;
			let b2 := null;
			if ( i + 1 < n ) {
				b1 := bytes[i + 1];
			}
			if ( i + 2 < n ) {
				b2 := bytes[i + 2];
			}

			let c0 := _div_floor( b0, 4 );
			let c1 := _mod( b0, 4 ) * 16;
			let c2 := 64;
			let c3 := 64;

			if ( not( b1 == null ) ) {
				c1 += _div_floor( b1, 16 );
				c2 := _mod( b1, 16 ) * 4;
				if ( not( b2 == null ) ) {
					c2 += _div_floor( b2, 64 );
					c3 := _mod( b2, 64 );
				}
			}

			out _= substr( _B64_ALPHABET, c0, 1 );
			out _= substr( _B64_ALPHABET, c1, 1 );
			out _= c2 == 64 ? "=" : substr( _B64_ALPHABET, c2, 1 );
			out _= c3 == 64 ? "=" : substr( _B64_ALPHABET, c3, 1 );
			i += 3;
		}

		return _base64_decode(out);
	}

	function _binary_to_bytes ( BinaryString raw ) {
		let b64 := _base64_encode(raw);
		let out := [];
		let i := 0;
		let n := length b64;

		while ( i < n ) {
			let c0 := index( _B64_ALPHABET, substr( b64, i, 1 ) );
			let c1 := index( _B64_ALPHABET, substr( b64, i + 1, 1 ) );
			let ch2 := substr( b64, i + 2, 1 );
			let ch3 := substr( b64, i + 3, 1 );
			let c2 := -1;
			let c3 := -1;
			if ( ch2 ne "=" ) {
				c2 := index( _B64_ALPHABET, ch2 );
			}
			if ( ch3 ne "=" ) {
				c3 := index( _B64_ALPHABET, ch3 );
			}

			out.push( c0 * 4 + _div_floor( c1, 16 ) );
			if ( c2 >= 0 ) {
				out.push( _mod( c1, 16 ) * 16 + _div_floor( c2, 4 ) );
			}
			if ( c3 >= 0 ) {
				out.push( _mod( c2, 4 ) * 64 + c3 );
			}

			i += 4;
		}

		return out;
	}

	function _parse_options ( PairList options ) {
		let line_length := 76;
		let newline := "\r\n";
		let binary := false;
		let strict := false;

		for ( let option in options.enumerate() ) {
			let key := option.key;
			let value := option.value;

			if ( key eq "line_length" ) {
				line_length := value;
			}
			else if ( key eq "newline" ) {
				newline := value;
			}
			else if ( key eq "binary" ) {
				binary := value;
			}
			else if ( key eq "strict" ) {
				strict := value;
			}
			else {
				die `quoted_printable option '${key}' is not supported`;
			}
		}

		if ( not( line_length instanceof Number ) ) {
			die "quoted_printable line_length option expects Number";
		}
		if ( line_length < 4 ) {
			die "quoted_printable line_length option must be at least 4";
		}
		if ( not( newline instanceof String ) ) {
			die "quoted_printable newline option expects String";
		}
		if ( not( binary instanceof Boolean ) ) {
			die "quoted_printable binary option expects Boolean";
		}
		if ( not( strict instanceof Boolean ) ) {
			die "quoted_printable strict option expects Boolean";
		}

		return {
			line_length: int(line_length),
			newline: newline,
			binary: binary,
			strict: strict,
		};
	}

	function _is_safe_literal ( Number b ) {
		return ( b >= 33 and b <= 60 ) or ( b >= 62 and b <= 126 );
	}

	function _byte_to_hex_token ( Number b ) {
		return "="
			_ substr( _HEX, _div_floor( b, 16 ), 1 )
			_ substr( _HEX, _mod( b, 16 ), 1 );
	}

	function _simple_token_length ( Number b, Boolean final_byte ) {
		if ( ( b == 9 or b == 32 ) and not final_byte ) {
			return 1;
		}
		if ( _is_safe_literal(b) ) {
			return 1;
		}
		return 3;
	}

	function _space_tab_needs_escape (
		Array bytes,
		Number i,
		Number column,
		Number line_length,
	) {
		let n := bytes.length();
		if ( i + 1 >= n ) {
			return true;
		}

		let j := i + 1;
		while ( j < n and ( bytes[j] == 9 or bytes[j] == 32 ) ) {
			j++;
		}
		if ( j >= n ) {
			return true;
		}

		let next_is_final := i + 2 >= n;
		let next_len := _simple_token_length( bytes[i + 1], next_is_final );
		let max := next_is_final ? line_length : line_length - 1;
		return column + 1 + next_len > max;
	}

	function _token_for_byte (
		Array bytes,
		Number i,
		Number column,
		Number line_length,
	) {
		let b := bytes[i];

		if ( b == 9 or b == 32 ) {
			if ( _space_tab_needs_escape( bytes, i, column, line_length ) ) {
				return _byte_to_hex_token(b);
			}
			return chr(b);
		}

		if ( _is_safe_literal(b) ) {
			return chr(b);
		}

		return _byte_to_hex_token(b);
	}

	function _emit_token (
		String out,
		Number column,
		String token,
		Boolean more_after,
		Number line_length,
		String newline,
	) {
		let max := more_after ? line_length - 1 : line_length;
		let updated_out := out;
		let updated_column := column;

		if ( updated_column > 0 and updated_column + ( length token ) > max ) {
			updated_out _= "=" _ newline;
			updated_column := 0;
		}

		updated_out _= token;
		updated_column += length token;

		return [ updated_out, updated_column ];
	}

	function _encode_segment (
		Array bytes,
		Number line_length,
		String newline,
	) {
		let out := "";
		let column := 0;
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let token := _token_for_byte( bytes, i, column, line_length );
			let emitted := _emit_token(
				out,
				column,
				token,
				i + 1 < n,
				line_length,
				newline,
			);
			out := emitted[0];
			column := emitted[1];
			i++;
		}

		return out;
	}

	function _encode_text_bytes (
		Array bytes,
		Number line_length,
		String newline,
	) {
		let out := "";
		let line := [];
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let b := bytes[i];
			if ( b == 13 or b == 10 ) {
				out _= _encode_segment( line, line_length, newline );
				out _= newline;
				line := [];

				if ( b == 13 and i + 1 < n and bytes[i + 1] == 10 ) {
					i++;
				}
			}
			else {
				line.push(b);
			}

			i++;
		}

		out _= _encode_segment( line, line_length, newline );
		return out;
	}

	function _hex_value ( String ch ) {
		let cp := ord(ch);
		if ( cp >= 48 and cp <= 57 ) {
			return cp - 48;
		}
		if ( cp >= 65 and cp <= 70 ) {
			return cp - 55;
		}
		if ( cp >= 97 and cp <= 102 ) {
			return cp - 87;
		}
		return -1;
	}

	function _decode_text_bytes ( String text, Boolean strict ) {
		let out := [];
		let i := 0;
		let n := length text;

		while ( i < n ) {
			let ch := substr( text, i, 1 );
			let cp := ord( text, i );

			if ( cp > 127 ) {
				die "quoted_printable.decode rejects non-ASCII input";
			}

			if ( ch eq "=" ) {
				if ( i + 1 >= n ) {
					die "malformed quoted-printable escape" if strict;
					out.push(61);
					i++;
					next;
				}

				let ch1 := substr( text, i + 1, 1 );
				if ( ch1 eq "\r" ) {
					if ( i + 2 < n and substr( text, i + 2, 1 ) eq "\n" ) {
						i += 3;
					}
					else {
						i += 2;
					}
					next;
				}
				if ( ch1 eq "\n" ) {
					i += 2;
					next;
				}

				if ( i + 2 < n ) {
					let hi := _hex_value(ch1);
					let lo := _hex_value( substr( text, i + 2, 1 ) );
					if ( hi >= 0 and lo >= 0 ) {
						out.push( hi * 16 + lo );
						i += 3;
						next;
					}
				}

				die "malformed quoted-printable escape" if strict;
				out.push(61);
				i++;
				next;
			}

			out.push(cp);
			i++;
		}

		return out;
	}

	encode := function ( BinaryString bytes, ... PairList options ) {
		let opts := _parse_options(options);
		let raw := _binary_to_bytes(bytes);

		if ( opts{binary} ) {
			return _encode_segment(
				raw,
				opts{line_length},
				opts{newline},
			);
		}

		return _encode_text_bytes(
			raw,
			opts{line_length},
			opts{newline},
		);
	};

	decode := function ( String text, ... PairList options ) {
		let opts := _parse_options(options);
		return _bytes_to_binary( _decode_text_bytes( text, opts{strict} ) );
	};
}