hext/
lib.rs

1mod error;
2
3pub use crate::error::Error;
4use bitvec::prelude::*;
5use error::InvalidHeaderKind;
6use std::iter::Peekable;
7use std::num::ParseIntError;
8use std::str::Chars;
9
10#[derive(Debug, PartialEq)]
11struct Header {
12	bitorder: BitOrder,
13	byteorder: ByteOrder,
14	negativekind: NegativeKind,
15	pad_bits: bool,
16}
17
18#[derive(Debug, PartialEq)]
19enum BitOrder {
20	Msb0,
21	Lsb0,
22}
23
24#[derive(Debug, PartialEq)]
25enum ByteOrder {
26	LittleEndian,
27	BigEndian,
28}
29
30#[derive(Debug, PartialEq)]
31enum NegativeKind {
32	TwosCompliment,
33	OnesCompliment,
34	SignMagnitude,
35}
36
37pub struct Hext {
38	parsed: Vec<u8>,
39}
40
41impl Hext {
42	pub fn new() -> Self {
43		Self { parsed: vec![] }
44	}
45
46	pub fn parse<S: AsRef<str>>(mut self, raw: S) -> Result<Vec<u8>, Error> {
47		let mut chars = raw.as_ref().chars().peekable();
48
49		// Clear through any leading comments or blank lines
50		Self::skip_nondata(&mut chars);
51
52		let header: Header;
53		loop {
54			match chars.next() {
55				Some('~') => {
56					header = Self::parse_header(Self::consume_line(&mut chars))?;
57					break;
58				}
59				Some(_) => return Err(Error::NoHeader),
60				None => return Ok(self.parsed), //todo: is this an error?
61			}
62		}
63
64		let mut bits: BitVec<u8, Msb0> = BitVec::new();
65		let mut state = State::ReadingHex;
66
67		loop {
68			match state {
69				State::ReadingHex => match chars.next_if(|&c| c != '.') {
70					Some('#') => Self::skip_line(&mut chars),
71					Some(c) if c.is_whitespace() => continue,
72
73					Some(high) if high.is_ascii_hexdigit() => {
74						match chars.next_if(|&c| c.is_ascii_hexdigit()) {
75							Some(low) => self.parsed.push(
76								((high.to_digit(16).unwrap() * 16) + low.to_digit(16).unwrap())
77									as u8,
78							),
79							None => return Err(Error::IncompleteOctet),
80						}
81					}
82
83					Some('=') => state = State::ReadingUnsizedDecimal,
84					Some('i') => state = State::ReadingSignedDecimal,
85					Some('u') => state = State::ReadingUnsignedDecimal,
86					Some('\"') => state = State::ReadingLiteral,
87					Some(c) => return Err(Error::InvalidCharacter(c)),
88
89					None => match chars.peek() {
90						Some('.') => state = State::ReadingBinary,
91						Some(_) => unreachable!(),
92						None => return Ok(self.parsed),
93					},
94				},
95
96				State::ReadingUnsizedDecimal => {
97					let decimal = Self::consume_until_whitespace(&mut chars);
98					state = State::ReadingHex;
99
100					let is_signed = if let Some(sign) = decimal.chars().next() {
101						sign == '-' || sign == '+'
102					} else {
103						// it was a lone =. Send the maybe-decimal string even
104						// though we know it's empty
105						return Err(Error::InvalidDecimal(decimal));
106					};
107
108					let mut bytes = if is_signed {
109						Self::signed_smallest_le_bytes(&decimal)
110					} else {
111						Self::unsigned_smallest_le_bytes(&decimal)
112					}
113					.map_err(|_e| Error::InvalidDecimal(decimal))?;
114
115					if header.byteorder == ByteOrder::BigEndian {
116						bytes.reverse();
117					}
118
119					self.parsed.extend_from_slice(&bytes);
120				}
121
122				State::ReadingSignedDecimal => {
123					let signed_decimal_string = Self::consume_until_whitespace(&mut chars);
124					state = State::ReadingHex;
125
126					let splits = signed_decimal_string.split_once('=');
127					match splits {
128						Some((bitness, value)) => {
129							let mut bytes = Self::signed_le_bytes(bitness, value)?;
130
131							if header.byteorder == ByteOrder::BigEndian {
132								bytes.reverse();
133							}
134
135							self.parsed.extend_from_slice(&bytes);
136						}
137						None => return Err(Error::InvalidSignedDecimal(signed_decimal_string)),
138					}
139				}
140
141				State::ReadingUnsignedDecimal => {
142					let signed_decimal_string = Self::consume_until_whitespace(&mut chars);
143					state = State::ReadingHex;
144
145					let splits = signed_decimal_string.split_once('=');
146					match splits {
147						Some((bitness, value)) => {
148							let mut bytes = Self::unsigned_le_bytes(bitness, value)?;
149
150							if header.byteorder == ByteOrder::BigEndian {
151								bytes.reverse();
152							}
153
154							self.parsed.extend_from_slice(&bytes);
155						}
156						None => return Err(Error::InvalidDecimal(signed_decimal_string)),
157					}
158				}
159
160				State::ReadingLiteral => match chars.next() {
161					Some('\"') => state = State::ReadingHex,
162					Some('\\') => match chars.next() {
163						Some(c) => match Self::escape(c) {
164							Some(c) => self.parsed.push(c as u8),
165							None => return Err(Error::InvalidEscape(c)),
166						},
167						None => return Err(Error::UnclosedStringLiteral),
168					},
169					Some('\n') => return Err(Error::UnclosedStringLiteral),
170					Some(c) => {
171						let mut encode = vec![0; c.len_utf8()];
172						c.encode_utf8(&mut encode);
173						self.parsed.extend_from_slice(&encode)
174					}
175					None => return Err(Error::UnclosedStringLiteral),
176				},
177
178				State::ReadingBinary => match chars.next_if(|&c| c == '.') {
179					Some('.') => loop {
180						match chars
181							.next_if(|&c| c == '1' || c == '0' || c == '#' || c.is_whitespace())
182						{
183							Some('0') => bits.push(false),
184							Some('1') => bits.push(true),
185							Some('#') => Self::skip_line(&mut chars),
186							Some(c) if c.is_whitespace() => {
187								Self::skip_nondata(&mut chars);
188								break;
189							}
190							Some(_) => return Err(Error::GarbageCharacterInBitstream),
191							None => break,
192						}
193					},
194					Some(_) => unreachable!(),
195					None => {
196						if bits.len() % 8 != 0 {
197							if !header.pad_bits {
198								eprintln!("{}", bits.len());
199								return Err(Error::UnalignedBits);
200							} else {
201								while bits.len() % 8 != 0 {
202									bits.insert(0, false);
203								}
204							}
205						}
206
207						self.parsed.extend_from_slice(bits.as_raw_slice());
208						bits = BitVec::new();
209
210						state = State::ReadingHex;
211					}
212				},
213			}
214		}
215	}
216
217	fn parse_header<S: AsRef<str>>(string: S) -> Result<Header, Error> {
218		let splits: Vec<&str> = string.as_ref().trim_end().split(' ').collect();
219
220		let mut bitorder = None;
221		let mut byteorder = None;
222		let mut negativekind = None;
223		let mut pad_bits = false;
224
225		for split in splits {
226			match split {
227				"msb0" => {
228					if bitorder.replace(BitOrder::Msb0).is_some() {
229						return Err(InvalidHeaderKind::TwoBitOrder.into());
230					}
231				}
232				"lsb0" => {
233					if bitorder.replace(BitOrder::Lsb0).is_some() {
234						return Err(InvalidHeaderKind::TwoBitOrder.into());
235					}
236				}
237				"big-endian" => {
238					if byteorder.replace(ByteOrder::BigEndian).is_some() {
239						return Err(InvalidHeaderKind::TwoByteOrder.into());
240					}
241				}
242				"little-endian" => {
243					if byteorder.replace(ByteOrder::LittleEndian).is_some() {
244						return Err(InvalidHeaderKind::TwoByteOrder.into());
245					}
246				}
247				"twos-compliment" => {
248					if negativekind.replace(NegativeKind::TwosCompliment).is_none() {
249						return Err(InvalidHeaderKind::TwoNegativeKind.into());
250					}
251				}
252				"ones-compliment" => {
253					if negativekind.replace(NegativeKind::OnesCompliment).is_none() {
254						return Err(InvalidHeaderKind::TwoNegativeKind.into());
255					}
256				}
257				"sign-magnitude" => {
258					if negativekind.replace(NegativeKind::SignMagnitude).is_none() {
259						return Err(InvalidHeaderKind::TwoNegativeKind.into());
260					}
261				}
262				"padbits" => pad_bits = true,
263				_ => return Err(InvalidHeaderKind::InvalidProperty(split.into()).into()),
264			}
265		}
266
267		if bitorder.is_none() {
268			return Err(InvalidHeaderKind::NoBitOrder.into());
269		} else if byteorder.is_none() {
270			return Err(InvalidHeaderKind::NoByteOrder.into());
271		} else {
272			Ok(Header {
273				bitorder: bitorder.unwrap(),
274				byteorder: byteorder.unwrap(),
275				negativekind: negativekind.unwrap_or(NegativeKind::TwosCompliment),
276				pad_bits,
277			})
278		}
279	}
280
281	fn escape(c: char) -> Option<char> {
282		match c {
283			'\"' => Some('\"'),
284			'\\' => Some('\\'),
285			'n' => Some('\n'),
286			'r' => Some('\r'),
287			't' => Some('\t'),
288			_ => None,
289		}
290	}
291
292	fn skip_nondata(mut chars: &mut Peekable<Chars>) {
293		loop {
294			match chars.peek() {
295				Some('#') => Self::skip_line(&mut chars),
296				Some(c) if c.is_whitespace() => {
297					chars.next();
298				}
299				_ => return,
300			};
301		}
302	}
303
304	fn skip_line(chars: &mut Peekable<Chars>) {
305		chars.find(|&c| c == '\n');
306	}
307
308	fn consume_line(chars: &mut Peekable<Chars>) -> String {
309		chars.take_while(|&c| c != '\n').collect()
310	}
311
312	fn consume_until_whitespace(chars: &mut Peekable<Chars>) -> String {
313		chars.take_while(|&c| !c.is_whitespace()).collect()
314	}
315
316	fn signed_le_bytes<S: AsRef<str>>(bitness: S, value: S) -> Result<Vec<u8>, Error> {
317		match bitness.as_ref() {
318			"8" => Ok(i8::from_str_radix(value.as_ref(), 10)
319				.map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
320				.to_le_bytes()
321				.to_vec()),
322			"16" => Ok(i16::from_str_radix(value.as_ref(), 10)
323				.map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
324				.to_le_bytes()
325				.to_vec()),
326			"32" => Ok(i32::from_str_radix(value.as_ref(), 10)
327				.map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
328				.to_le_bytes()
329				.to_vec()),
330			"64" => Ok(i64::from_str_radix(value.as_ref(), 10)
331				.map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
332				.to_le_bytes()
333				.to_vec()),
334			_ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())),
335		}
336	}
337
338	fn signed_smallest_le_bytes<S: AsRef<str>>(string: S) -> Result<Vec<u8>, ParseIntError> {
339		let large: i64 = i64::from_str_radix(string.as_ref(), 10)?;
340
341		Ok(if large > i32::MAX as i64 || large < i32::MIN as i64 {
342			large.to_le_bytes().to_vec()
343		} else if large > i16::MAX as i64 || large < i16::MIN as i64 {
344			(large as i32).to_le_bytes().to_vec()
345		} else if large > i8::MAX as i64 || large < i8::MIN as i64 {
346			(large as i16).to_le_bytes().to_vec()
347		} else {
348			(large as i8).to_le_bytes().to_vec()
349		})
350	}
351
352	fn unsigned_le_bytes<S: AsRef<str>>(bitness: S, value: S) -> Result<Vec<u8>, Error> {
353		match bitness.as_ref() {
354			"8" => Ok(u8::from_str_radix(value.as_ref(), 10)
355				.map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
356				.to_le_bytes()
357				.to_vec()),
358			"16" => Ok(u16::from_str_radix(value.as_ref(), 10)
359				.map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
360				.to_le_bytes()
361				.to_vec()),
362			"32" => Ok(u32::from_str_radix(value.as_ref(), 10)
363				.map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
364				.to_le_bytes()
365				.to_vec()),
366			"64" => Ok(u64::from_str_radix(value.as_ref(), 10)
367				.map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
368				.to_le_bytes()
369				.to_vec()),
370			_ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())),
371		}
372	}
373
374	fn unsigned_smallest_le_bytes<S: AsRef<str>>(string: S) -> Result<Vec<u8>, ParseIntError> {
375		let large: u64 = u64::from_str_radix(string.as_ref(), 10)?;
376
377		Ok(if large > u32::MAX as u64 {
378			large.to_le_bytes().to_vec()
379		} else if large > u16::MAX as u64 {
380			(large as u32).to_le_bytes().to_vec()
381		} else if large > u8::MAX as u64 {
382			(large as u16).to_le_bytes().to_vec()
383		} else {
384			(large as u8).to_le_bytes().to_vec()
385		})
386	}
387}
388
389enum State {
390	ReadingHex,
391	ReadingUnsizedDecimal,
392	ReadingSignedDecimal,
393	ReadingUnsignedDecimal,
394	ReadingBinary,
395	ReadingLiteral,
396}
397
398#[cfg(test)]
399mod test {
400	use super::*;
401
402	#[test]
403	fn pares_header_success() {
404		// Recognizes the keytwords...
405		assert_eq!(
406			Hext::parse_header("msb0 big-endian").unwrap(),
407			Header {
408				byteorder: ByteOrder::BigEndian,
409				bitorder: crate::BitOrder::Msb0,
410				negativekind: NegativeKind::TwosCompliment,
411				pad_bits: false
412			}
413		);
414
415		assert_eq!(
416			Hext::parse_header("lsb0 little-endian").unwrap(),
417			Header {
418				byteorder: ByteOrder::LittleEndian,
419				bitorder: crate::BitOrder::Lsb0,
420				negativekind: NegativeKind::TwosCompliment,
421				pad_bits: false
422			}
423		);
424
425		// ...In either order
426		assert_eq!(
427			Hext::parse_header("big-endian lsb0").unwrap(),
428			Header {
429				byteorder: ByteOrder::BigEndian,
430				bitorder: crate::BitOrder::Lsb0,
431				negativekind: NegativeKind::TwosCompliment,
432				pad_bits: false
433			}
434		);
435	}
436
437	#[test]
438	fn parse_header_fail_twobits() {
439		assert_eq!(
440			Hext::parse_header("lsb0 msb0"),
441			Err(InvalidHeaderKind::TwoBitOrder.into())
442		)
443	}
444
445	#[test]
446	fn parse_header_fail_twobytes() {
447		assert_eq!(
448			Hext::parse_header("little-endian big-endian"),
449			Err(InvalidHeaderKind::TwoByteOrder.into())
450		)
451	}
452
453	#[test]
454	fn parse_header_fail_nobits() {
455		assert_eq!(
456			Hext::parse_header("big-endian"),
457			Err(InvalidHeaderKind::NoBitOrder.into())
458		)
459	}
460
461	#[test]
462	fn parse_header_fail_nobytes() {
463		assert_eq!(
464			Hext::parse_header("msb0"),
465			Err(InvalidHeaderKind::NoByteOrder.into())
466		)
467	}
468
469	#[test]
470	fn parse_header_fail_invalidproperty() {
471		assert_eq!(
472			Hext::parse_header("lsb0 big-endian invalidproperty"),
473			Err(InvalidHeaderKind::InvalidProperty("invalidproperty".into()).into())
474		)
475	}
476
477	//## Bytes tests ##
478	#[test]
479	fn test_onebyte() {
480		let test = "~little-endian msb0\n41";
481		let cmp = vec![0x41];
482
483		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
484	}
485
486	#[test]
487	fn test_only_comment() {
488		let test = "~little-endian msb0\n# Comment";
489
490		assert_eq!(Hext::new().parse(&test).unwrap(), vec![]);
491	}
492
493	#[test]
494	fn test_1byte_comment() {
495		let test = "~little-endian msb0\n41 #A";
496		let cmp = vec![0x41];
497
498		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
499	}
500
501	#[test]
502	fn test_byte_nospace_comment() {
503		let test = "~little-endian msb0\n41#A";
504		let cmp = vec![0x41];
505
506		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
507	}
508
509	#[test]
510	fn test_2byte_multiline() {
511		let test = "~little-endian msb0\n41\n42";
512		let cmp = vec![0x41, 0x42];
513
514		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
515	}
516
517	#[test]
518	fn test_2bytes_nospace() {
519		let test = "~little-endian msb0\n4142";
520		let cmp = vec![0x41, 0x42];
521
522		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
523	}
524
525	//## Bit Tests ##
526	#[test]
527	fn test_8bits() {
528		let test = "~little-endian msb0\n.01000001";
529		let cmp = vec![0x41];
530
531		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
532	}
533
534	#[test]
535	fn test_8bits_hex10() {
536		let test = "~little-endian msb0\n.01000001 10";
537		let cmp = vec![0x41, 0x10];
538
539		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
540	}
541
542	#[test]
543	fn test_8bit_comment() {
544		let test = "~little-endian msb0\n.01000001 # A";
545		let cmp = vec![0x41];
546
547		assert_eq!(Hext::new().parse(&test).unwrap(), cmp)
548	}
549
550	#[test]
551	fn test_8bit_nospace_comment() {
552		let test = "~little-endian msb0\n.01000001#A";
553		let cmp = vec![0x41];
554
555		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
556	}
557
558	#[test]
559	fn test_1bit() {
560		let test = "~little-endian msb0 padbits\n.1";
561		let cmp = vec![0x01];
562
563		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
564	}
565
566	#[test]
567	fn test_8bits_halved_space() {
568		let test_space = "~little-endian msb0\n.0100 .0010";
569		let cmp = vec![0x42];
570
571		assert_eq!(Hext::new().parse(&test_space).unwrap(), cmp);
572	}
573
574	#[test]
575	fn test_8bits_halved_line() {
576		let test_line = "~little-endian msb0\n.0100\n.0010";
577		let cmp = vec![0x42];
578
579		assert_eq!(Hext::new().parse(&test_line).unwrap(), cmp);
580	}
581
582	#[test]
583	fn test_8bits_halved_line_comments() {
584		let test_line_comments = "~little-endian msb0\n.0100#Half of capital letter\n.0010 # B";
585		let cmp = vec![0x42];
586
587		assert_eq!(Hext::new().parse(&test_line_comments).unwrap(), cmp);
588	}
589
590	#[test]
591	fn test_1bit_then_byte() {
592		let test = "~little-endian msb0 padbits\n.1 41";
593		let cmp = vec![0x01, 0x41];
594
595		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
596	}
597
598	//## Literal Tests ##
599	#[test]
600	fn literal_multibyte() {
601		let test = "~big-endian lsb0\n\"🥺\"";
602		let cmp = vec![0xf0, 0x9f, 0xa5, 0xba];
603
604		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
605	}
606
607	//## Decimal Tests ##
608	#[test]
609	fn decimal_unsized_u8() {
610		let test = "~big-endian lsb0\n=200";
611		let cmp = vec![200];
612
613		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
614	}
615
616	#[test]
617	fn decimal_unsized_i8() {
618		let test = "~big-endian lsb0\n=-127";
619		let cmp = (-127i8).to_be_bytes().to_vec();
620
621		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
622	}
623
624	#[test]
625	fn decimal_unsized_u32() {
626		let test = "~little-endian lsb0\n=65536";
627		let cmp = 65536u32.to_le_bytes().to_vec();
628
629		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
630	}
631
632	#[test]
633	fn decimal_unsized_i32() {
634		let test = "~little-endian lsb0\n=-40000";
635		let cmp = (-40000i32).to_le_bytes().to_vec();
636
637		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
638	}
639
640	#[test]
641	fn decimal_sized_u16() {
642		let test = "~little-endian lsb0\nu16=65534";
643		let cmp = 65534u16.to_le_bytes().to_vec();
644
645		assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
646	}
647
648	#[test]
649	fn decimal_overflow_sized_u16() {
650		let test = "~little-endian lsb0\nu16=65536";
651
652		assert_eq!(
653			Hext::new().parse(&test).unwrap_err(),
654			Error::InvalidUnsignedDecimal("65536".into())
655		);
656	}
657
658	//## Everything ##
659	#[test]
660	fn everything() {
661		let to_parse = std::fs::read_to_string("tests/everything.hxt").unwrap();
662		let cmp = std::fs::read_to_string("tests/everything.correct")
663			.unwrap()
664			.into_bytes();
665
666		assert_eq!(Hext::new().parse(&to_parse).unwrap(), cmp)
667	}
668
669	//## Failing Tests ##
670	#[test]
671	fn ftest_incompleteoctet() {
672		let test = "~little-endian msb0\n4";
673
674		assert_eq!(
675			Hext::new().parse(&test).unwrap_err(),
676			Error::IncompleteOctet
677		);
678	}
679
680	#[test]
681	fn ftest_invalidcharacter() {
682		let test = "~little-endian msb0\nG";
683
684		assert_eq!(
685			Hext::new().parse(&test).unwrap_err(),
686			Error::InvalidCharacter('G')
687		);
688	}
689
690	#[test]
691	fn ftest_unaligned_bit() {
692		let test = "~little-endian msb0\n.1";
693		let cmp = Error::UnalignedBits;
694
695		assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp);
696	}
697
698	#[test]
699	fn ftest_unaligned_bit_then_byte() {
700		let test = "~little-endian msb0\n.1 41";
701		let cmp = Error::UnalignedBits;
702
703		assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp);
704	}
705}