1mod error;
2
3pub use crate::error::Error;
4use bitvec::prelude::*;
5use error::InvalidHeaderKind;
6use std::iter::Peekable;
7use std::num::ParseIntError;
8use std::str::Chars;
9
10#[derive(Debug, PartialEq)]
11struct Header {
12 bitorder: BitOrder,
13 byteorder: ByteOrder,
14 negativekind: NegativeKind,
15 pad_bits: bool,
16}
17
18#[derive(Debug, PartialEq)]
19enum BitOrder {
20 Msb0,
21 Lsb0,
22}
23
24#[derive(Debug, PartialEq)]
25enum ByteOrder {
26 LittleEndian,
27 BigEndian,
28}
29
30#[derive(Debug, PartialEq)]
31enum NegativeKind {
32 TwosCompliment,
33 OnesCompliment,
34 SignMagnitude,
35}
36
37pub struct Hext {
38 parsed: Vec<u8>,
39}
40
41impl Hext {
42 pub fn new() -> Self {
43 Self { parsed: vec![] }
44 }
45
46 pub fn parse<S: AsRef<str>>(mut self, raw: S) -> Result<Vec<u8>, Error> {
47 let mut chars = raw.as_ref().chars().peekable();
48
49 Self::skip_nondata(&mut chars);
51
52 let header: Header;
53 loop {
54 match chars.next() {
55 Some('~') => {
56 header = Self::parse_header(Self::consume_line(&mut chars))?;
57 break;
58 }
59 Some(_) => return Err(Error::NoHeader),
60 None => return Ok(self.parsed), }
62 }
63
64 let mut bits: BitVec<u8, Msb0> = BitVec::new();
65 let mut state = State::ReadingHex;
66
67 loop {
68 match state {
69 State::ReadingHex => match chars.next_if(|&c| c != '.') {
70 Some('#') => Self::skip_line(&mut chars),
71 Some(c) if c.is_whitespace() => continue,
72
73 Some(high) if high.is_ascii_hexdigit() => {
74 match chars.next_if(|&c| c.is_ascii_hexdigit()) {
75 Some(low) => self.parsed.push(
76 ((high.to_digit(16).unwrap() * 16) + low.to_digit(16).unwrap())
77 as u8,
78 ),
79 None => return Err(Error::IncompleteOctet),
80 }
81 }
82
83 Some('=') => state = State::ReadingUnsizedDecimal,
84 Some('i') => state = State::ReadingSignedDecimal,
85 Some('u') => state = State::ReadingUnsignedDecimal,
86 Some('\"') => state = State::ReadingLiteral,
87 Some(c) => return Err(Error::InvalidCharacter(c)),
88
89 None => match chars.peek() {
90 Some('.') => state = State::ReadingBinary,
91 Some(_) => unreachable!(),
92 None => return Ok(self.parsed),
93 },
94 },
95
96 State::ReadingUnsizedDecimal => {
97 let decimal = Self::consume_until_whitespace(&mut chars);
98 state = State::ReadingHex;
99
100 let is_signed = if let Some(sign) = decimal.chars().next() {
101 sign == '-' || sign == '+'
102 } else {
103 return Err(Error::InvalidDecimal(decimal));
106 };
107
108 let mut bytes = if is_signed {
109 Self::signed_smallest_le_bytes(&decimal)
110 } else {
111 Self::unsigned_smallest_le_bytes(&decimal)
112 }
113 .map_err(|_e| Error::InvalidDecimal(decimal))?;
114
115 if header.byteorder == ByteOrder::BigEndian {
116 bytes.reverse();
117 }
118
119 self.parsed.extend_from_slice(&bytes);
120 }
121
122 State::ReadingSignedDecimal => {
123 let signed_decimal_string = Self::consume_until_whitespace(&mut chars);
124 state = State::ReadingHex;
125
126 let splits = signed_decimal_string.split_once('=');
127 match splits {
128 Some((bitness, value)) => {
129 let mut bytes = Self::signed_le_bytes(bitness, value)?;
130
131 if header.byteorder == ByteOrder::BigEndian {
132 bytes.reverse();
133 }
134
135 self.parsed.extend_from_slice(&bytes);
136 }
137 None => return Err(Error::InvalidSignedDecimal(signed_decimal_string)),
138 }
139 }
140
141 State::ReadingUnsignedDecimal => {
142 let signed_decimal_string = Self::consume_until_whitespace(&mut chars);
143 state = State::ReadingHex;
144
145 let splits = signed_decimal_string.split_once('=');
146 match splits {
147 Some((bitness, value)) => {
148 let mut bytes = Self::unsigned_le_bytes(bitness, value)?;
149
150 if header.byteorder == ByteOrder::BigEndian {
151 bytes.reverse();
152 }
153
154 self.parsed.extend_from_slice(&bytes);
155 }
156 None => return Err(Error::InvalidDecimal(signed_decimal_string)),
157 }
158 }
159
160 State::ReadingLiteral => match chars.next() {
161 Some('\"') => state = State::ReadingHex,
162 Some('\\') => match chars.next() {
163 Some(c) => match Self::escape(c) {
164 Some(c) => self.parsed.push(c as u8),
165 None => return Err(Error::InvalidEscape(c)),
166 },
167 None => return Err(Error::UnclosedStringLiteral),
168 },
169 Some('\n') => return Err(Error::UnclosedStringLiteral),
170 Some(c) => {
171 let mut encode = vec![0; c.len_utf8()];
172 c.encode_utf8(&mut encode);
173 self.parsed.extend_from_slice(&encode)
174 }
175 None => return Err(Error::UnclosedStringLiteral),
176 },
177
178 State::ReadingBinary => match chars.next_if(|&c| c == '.') {
179 Some('.') => loop {
180 match chars
181 .next_if(|&c| c == '1' || c == '0' || c == '#' || c.is_whitespace())
182 {
183 Some('0') => bits.push(false),
184 Some('1') => bits.push(true),
185 Some('#') => Self::skip_line(&mut chars),
186 Some(c) if c.is_whitespace() => {
187 Self::skip_nondata(&mut chars);
188 break;
189 }
190 Some(_) => return Err(Error::GarbageCharacterInBitstream),
191 None => break,
192 }
193 },
194 Some(_) => unreachable!(),
195 None => {
196 if bits.len() % 8 != 0 {
197 if !header.pad_bits {
198 eprintln!("{}", bits.len());
199 return Err(Error::UnalignedBits);
200 } else {
201 while bits.len() % 8 != 0 {
202 bits.insert(0, false);
203 }
204 }
205 }
206
207 self.parsed.extend_from_slice(bits.as_raw_slice());
208 bits = BitVec::new();
209
210 state = State::ReadingHex;
211 }
212 },
213 }
214 }
215 }
216
217 fn parse_header<S: AsRef<str>>(string: S) -> Result<Header, Error> {
218 let splits: Vec<&str> = string.as_ref().trim_end().split(' ').collect();
219
220 let mut bitorder = None;
221 let mut byteorder = None;
222 let mut negativekind = None;
223 let mut pad_bits = false;
224
225 for split in splits {
226 match split {
227 "msb0" => {
228 if bitorder.replace(BitOrder::Msb0).is_some() {
229 return Err(InvalidHeaderKind::TwoBitOrder.into());
230 }
231 }
232 "lsb0" => {
233 if bitorder.replace(BitOrder::Lsb0).is_some() {
234 return Err(InvalidHeaderKind::TwoBitOrder.into());
235 }
236 }
237 "big-endian" => {
238 if byteorder.replace(ByteOrder::BigEndian).is_some() {
239 return Err(InvalidHeaderKind::TwoByteOrder.into());
240 }
241 }
242 "little-endian" => {
243 if byteorder.replace(ByteOrder::LittleEndian).is_some() {
244 return Err(InvalidHeaderKind::TwoByteOrder.into());
245 }
246 }
247 "twos-compliment" => {
248 if negativekind.replace(NegativeKind::TwosCompliment).is_none() {
249 return Err(InvalidHeaderKind::TwoNegativeKind.into());
250 }
251 }
252 "ones-compliment" => {
253 if negativekind.replace(NegativeKind::OnesCompliment).is_none() {
254 return Err(InvalidHeaderKind::TwoNegativeKind.into());
255 }
256 }
257 "sign-magnitude" => {
258 if negativekind.replace(NegativeKind::SignMagnitude).is_none() {
259 return Err(InvalidHeaderKind::TwoNegativeKind.into());
260 }
261 }
262 "padbits" => pad_bits = true,
263 _ => return Err(InvalidHeaderKind::InvalidProperty(split.into()).into()),
264 }
265 }
266
267 if bitorder.is_none() {
268 return Err(InvalidHeaderKind::NoBitOrder.into());
269 } else if byteorder.is_none() {
270 return Err(InvalidHeaderKind::NoByteOrder.into());
271 } else {
272 Ok(Header {
273 bitorder: bitorder.unwrap(),
274 byteorder: byteorder.unwrap(),
275 negativekind: negativekind.unwrap_or(NegativeKind::TwosCompliment),
276 pad_bits,
277 })
278 }
279 }
280
281 fn escape(c: char) -> Option<char> {
282 match c {
283 '\"' => Some('\"'),
284 '\\' => Some('\\'),
285 'n' => Some('\n'),
286 'r' => Some('\r'),
287 't' => Some('\t'),
288 _ => None,
289 }
290 }
291
292 fn skip_nondata(mut chars: &mut Peekable<Chars>) {
293 loop {
294 match chars.peek() {
295 Some('#') => Self::skip_line(&mut chars),
296 Some(c) if c.is_whitespace() => {
297 chars.next();
298 }
299 _ => return,
300 };
301 }
302 }
303
304 fn skip_line(chars: &mut Peekable<Chars>) {
305 chars.find(|&c| c == '\n');
306 }
307
308 fn consume_line(chars: &mut Peekable<Chars>) -> String {
309 chars.take_while(|&c| c != '\n').collect()
310 }
311
312 fn consume_until_whitespace(chars: &mut Peekable<Chars>) -> String {
313 chars.take_while(|&c| !c.is_whitespace()).collect()
314 }
315
316 fn signed_le_bytes<S: AsRef<str>>(bitness: S, value: S) -> Result<Vec<u8>, Error> {
317 match bitness.as_ref() {
318 "8" => Ok(i8::from_str_radix(value.as_ref(), 10)
319 .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
320 .to_le_bytes()
321 .to_vec()),
322 "16" => Ok(i16::from_str_radix(value.as_ref(), 10)
323 .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
324 .to_le_bytes()
325 .to_vec()),
326 "32" => Ok(i32::from_str_radix(value.as_ref(), 10)
327 .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
328 .to_le_bytes()
329 .to_vec()),
330 "64" => Ok(i64::from_str_radix(value.as_ref(), 10)
331 .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))?
332 .to_le_bytes()
333 .to_vec()),
334 _ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())),
335 }
336 }
337
338 fn signed_smallest_le_bytes<S: AsRef<str>>(string: S) -> Result<Vec<u8>, ParseIntError> {
339 let large: i64 = i64::from_str_radix(string.as_ref(), 10)?;
340
341 Ok(if large > i32::MAX as i64 || large < i32::MIN as i64 {
342 large.to_le_bytes().to_vec()
343 } else if large > i16::MAX as i64 || large < i16::MIN as i64 {
344 (large as i32).to_le_bytes().to_vec()
345 } else if large > i8::MAX as i64 || large < i8::MIN as i64 {
346 (large as i16).to_le_bytes().to_vec()
347 } else {
348 (large as i8).to_le_bytes().to_vec()
349 })
350 }
351
352 fn unsigned_le_bytes<S: AsRef<str>>(bitness: S, value: S) -> Result<Vec<u8>, Error> {
353 match bitness.as_ref() {
354 "8" => Ok(u8::from_str_radix(value.as_ref(), 10)
355 .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
356 .to_le_bytes()
357 .to_vec()),
358 "16" => Ok(u16::from_str_radix(value.as_ref(), 10)
359 .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
360 .to_le_bytes()
361 .to_vec()),
362 "32" => Ok(u32::from_str_radix(value.as_ref(), 10)
363 .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
364 .to_le_bytes()
365 .to_vec()),
366 "64" => Ok(u64::from_str_radix(value.as_ref(), 10)
367 .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))?
368 .to_le_bytes()
369 .to_vec()),
370 _ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())),
371 }
372 }
373
374 fn unsigned_smallest_le_bytes<S: AsRef<str>>(string: S) -> Result<Vec<u8>, ParseIntError> {
375 let large: u64 = u64::from_str_radix(string.as_ref(), 10)?;
376
377 Ok(if large > u32::MAX as u64 {
378 large.to_le_bytes().to_vec()
379 } else if large > u16::MAX as u64 {
380 (large as u32).to_le_bytes().to_vec()
381 } else if large > u8::MAX as u64 {
382 (large as u16).to_le_bytes().to_vec()
383 } else {
384 (large as u8).to_le_bytes().to_vec()
385 })
386 }
387}
388
389enum State {
390 ReadingHex,
391 ReadingUnsizedDecimal,
392 ReadingSignedDecimal,
393 ReadingUnsignedDecimal,
394 ReadingBinary,
395 ReadingLiteral,
396}
397
398#[cfg(test)]
399mod test {
400 use super::*;
401
402 #[test]
403 fn pares_header_success() {
404 assert_eq!(
406 Hext::parse_header("msb0 big-endian").unwrap(),
407 Header {
408 byteorder: ByteOrder::BigEndian,
409 bitorder: crate::BitOrder::Msb0,
410 negativekind: NegativeKind::TwosCompliment,
411 pad_bits: false
412 }
413 );
414
415 assert_eq!(
416 Hext::parse_header("lsb0 little-endian").unwrap(),
417 Header {
418 byteorder: ByteOrder::LittleEndian,
419 bitorder: crate::BitOrder::Lsb0,
420 negativekind: NegativeKind::TwosCompliment,
421 pad_bits: false
422 }
423 );
424
425 assert_eq!(
427 Hext::parse_header("big-endian lsb0").unwrap(),
428 Header {
429 byteorder: ByteOrder::BigEndian,
430 bitorder: crate::BitOrder::Lsb0,
431 negativekind: NegativeKind::TwosCompliment,
432 pad_bits: false
433 }
434 );
435 }
436
437 #[test]
438 fn parse_header_fail_twobits() {
439 assert_eq!(
440 Hext::parse_header("lsb0 msb0"),
441 Err(InvalidHeaderKind::TwoBitOrder.into())
442 )
443 }
444
445 #[test]
446 fn parse_header_fail_twobytes() {
447 assert_eq!(
448 Hext::parse_header("little-endian big-endian"),
449 Err(InvalidHeaderKind::TwoByteOrder.into())
450 )
451 }
452
453 #[test]
454 fn parse_header_fail_nobits() {
455 assert_eq!(
456 Hext::parse_header("big-endian"),
457 Err(InvalidHeaderKind::NoBitOrder.into())
458 )
459 }
460
461 #[test]
462 fn parse_header_fail_nobytes() {
463 assert_eq!(
464 Hext::parse_header("msb0"),
465 Err(InvalidHeaderKind::NoByteOrder.into())
466 )
467 }
468
469 #[test]
470 fn parse_header_fail_invalidproperty() {
471 assert_eq!(
472 Hext::parse_header("lsb0 big-endian invalidproperty"),
473 Err(InvalidHeaderKind::InvalidProperty("invalidproperty".into()).into())
474 )
475 }
476
477 #[test]
479 fn test_onebyte() {
480 let test = "~little-endian msb0\n41";
481 let cmp = vec![0x41];
482
483 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
484 }
485
486 #[test]
487 fn test_only_comment() {
488 let test = "~little-endian msb0\n# Comment";
489
490 assert_eq!(Hext::new().parse(&test).unwrap(), vec![]);
491 }
492
493 #[test]
494 fn test_1byte_comment() {
495 let test = "~little-endian msb0\n41 #A";
496 let cmp = vec![0x41];
497
498 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
499 }
500
501 #[test]
502 fn test_byte_nospace_comment() {
503 let test = "~little-endian msb0\n41#A";
504 let cmp = vec![0x41];
505
506 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
507 }
508
509 #[test]
510 fn test_2byte_multiline() {
511 let test = "~little-endian msb0\n41\n42";
512 let cmp = vec![0x41, 0x42];
513
514 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
515 }
516
517 #[test]
518 fn test_2bytes_nospace() {
519 let test = "~little-endian msb0\n4142";
520 let cmp = vec![0x41, 0x42];
521
522 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
523 }
524
525 #[test]
527 fn test_8bits() {
528 let test = "~little-endian msb0\n.01000001";
529 let cmp = vec![0x41];
530
531 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
532 }
533
534 #[test]
535 fn test_8bits_hex10() {
536 let test = "~little-endian msb0\n.01000001 10";
537 let cmp = vec![0x41, 0x10];
538
539 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
540 }
541
542 #[test]
543 fn test_8bit_comment() {
544 let test = "~little-endian msb0\n.01000001 # A";
545 let cmp = vec![0x41];
546
547 assert_eq!(Hext::new().parse(&test).unwrap(), cmp)
548 }
549
550 #[test]
551 fn test_8bit_nospace_comment() {
552 let test = "~little-endian msb0\n.01000001#A";
553 let cmp = vec![0x41];
554
555 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
556 }
557
558 #[test]
559 fn test_1bit() {
560 let test = "~little-endian msb0 padbits\n.1";
561 let cmp = vec![0x01];
562
563 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
564 }
565
566 #[test]
567 fn test_8bits_halved_space() {
568 let test_space = "~little-endian msb0\n.0100 .0010";
569 let cmp = vec![0x42];
570
571 assert_eq!(Hext::new().parse(&test_space).unwrap(), cmp);
572 }
573
574 #[test]
575 fn test_8bits_halved_line() {
576 let test_line = "~little-endian msb0\n.0100\n.0010";
577 let cmp = vec![0x42];
578
579 assert_eq!(Hext::new().parse(&test_line).unwrap(), cmp);
580 }
581
582 #[test]
583 fn test_8bits_halved_line_comments() {
584 let test_line_comments = "~little-endian msb0\n.0100#Half of capital letter\n.0010 # B";
585 let cmp = vec![0x42];
586
587 assert_eq!(Hext::new().parse(&test_line_comments).unwrap(), cmp);
588 }
589
590 #[test]
591 fn test_1bit_then_byte() {
592 let test = "~little-endian msb0 padbits\n.1 41";
593 let cmp = vec![0x01, 0x41];
594
595 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
596 }
597
598 #[test]
600 fn literal_multibyte() {
601 let test = "~big-endian lsb0\n\"🥺\"";
602 let cmp = vec![0xf0, 0x9f, 0xa5, 0xba];
603
604 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
605 }
606
607 #[test]
609 fn decimal_unsized_u8() {
610 let test = "~big-endian lsb0\n=200";
611 let cmp = vec![200];
612
613 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
614 }
615
616 #[test]
617 fn decimal_unsized_i8() {
618 let test = "~big-endian lsb0\n=-127";
619 let cmp = (-127i8).to_be_bytes().to_vec();
620
621 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
622 }
623
624 #[test]
625 fn decimal_unsized_u32() {
626 let test = "~little-endian lsb0\n=65536";
627 let cmp = 65536u32.to_le_bytes().to_vec();
628
629 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
630 }
631
632 #[test]
633 fn decimal_unsized_i32() {
634 let test = "~little-endian lsb0\n=-40000";
635 let cmp = (-40000i32).to_le_bytes().to_vec();
636
637 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
638 }
639
640 #[test]
641 fn decimal_sized_u16() {
642 let test = "~little-endian lsb0\nu16=65534";
643 let cmp = 65534u16.to_le_bytes().to_vec();
644
645 assert_eq!(Hext::new().parse(&test).unwrap(), cmp);
646 }
647
648 #[test]
649 fn decimal_overflow_sized_u16() {
650 let test = "~little-endian lsb0\nu16=65536";
651
652 assert_eq!(
653 Hext::new().parse(&test).unwrap_err(),
654 Error::InvalidUnsignedDecimal("65536".into())
655 );
656 }
657
658 #[test]
660 fn everything() {
661 let to_parse = std::fs::read_to_string("tests/everything.hxt").unwrap();
662 let cmp = std::fs::read_to_string("tests/everything.correct")
663 .unwrap()
664 .into_bytes();
665
666 assert_eq!(Hext::new().parse(&to_parse).unwrap(), cmp)
667 }
668
669 #[test]
671 fn ftest_incompleteoctet() {
672 let test = "~little-endian msb0\n4";
673
674 assert_eq!(
675 Hext::new().parse(&test).unwrap_err(),
676 Error::IncompleteOctet
677 );
678 }
679
680 #[test]
681 fn ftest_invalidcharacter() {
682 let test = "~little-endian msb0\nG";
683
684 assert_eq!(
685 Hext::new().parse(&test).unwrap_err(),
686 Error::InvalidCharacter('G')
687 );
688 }
689
690 #[test]
691 fn ftest_unaligned_bit() {
692 let test = "~little-endian msb0\n.1";
693 let cmp = Error::UnalignedBits;
694
695 assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp);
696 }
697
698 #[test]
699 fn ftest_unaligned_bit_then_byte() {
700 let test = "~little-endian msb0\n.1 41";
701 let cmp = Error::UnalignedBits;
702
703 assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp);
704 }
705}