libmagic_rs/parser/types.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, MetaType, PStringLengthWidth, TypeKind};
15
16/// Error returned by [`type_keyword_to_kind`] when the supplied keyword is
17/// not a recognized magic type keyword.
18///
19/// This is a tight, structured error surfaced from a pure mapping function
20/// that has no access to line-number context. Callers that *do* have line
21/// context (e.g. the grammar layer wrapping a higher-level parse) can
22/// convert it into a richer [`crate::error::ParseError`] variant if needed.
23/// The struct is `#[non_exhaustive]` so future fields (e.g. suggested
24/// alternatives) can be added without a major version bump.
25///
26/// # Examples
27///
28/// ```
29/// use libmagic_rs::parser::types::{type_keyword_to_kind, UnknownTypeKeyword};
30///
31/// let err = type_keyword_to_kind("notarealtype").unwrap_err();
32/// assert_eq!(err.keyword, "notarealtype");
33/// ```
34#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
35#[non_exhaustive]
36#[error("unknown type keyword: {keyword}")]
37pub struct UnknownTypeKeyword {
38 /// The keyword string that was not recognized.
39 pub keyword: String,
40}
41
42/// Parse a type keyword from magic file input
43///
44/// Recognizes all supported type keywords and returns the matched keyword string.
45/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
46/// prefixes matched first within each group to avoid ambiguous partial matches.
47///
48/// # Supported Keywords
49///
50/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
51/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
52/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
53/// - 8-bit: `ubyte`, `byte`
54/// - String: `pstring`, `string`
55///
56/// # Examples
57///
58/// ```
59/// use libmagic_rs::parser::types::parse_type_keyword;
60///
61/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
62/// assert_eq!(keyword, "bequad");
63/// assert_eq!(rest, " rest");
64/// ```
65///
66/// # Errors
67///
68/// Returns a nom parsing error if the input doesn't start with a known type keyword.
69pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
70 alt((
71 // 64-bit types (6 branches)
72 alt((
73 tag("ubequad"),
74 tag("ulequad"),
75 tag("uquad"),
76 tag("bequad"),
77 tag("lequad"),
78 tag("quad"),
79 )),
80 // 32-bit types (6 branches)
81 alt((
82 tag("ubelong"),
83 tag("ulelong"),
84 tag("ulong"),
85 tag("belong"),
86 tag("lelong"),
87 tag("long"),
88 )),
89 // 16-bit types (6 branches)
90 alt((
91 tag("ubeshort"),
92 tag("uleshort"),
93 tag("ushort"),
94 tag("beshort"),
95 tag("leshort"),
96 tag("short"),
97 )),
98 // 8-bit types (2 branches)
99 alt((tag("ubyte"), tag("byte"))),
100 // Float/double types (6 branches)
101 alt((
102 tag("bedouble"),
103 tag("ledouble"),
104 tag("double"),
105 tag("befloat"),
106 tag("lefloat"),
107 tag("float"),
108 )),
109 // Date types -- 32-bit (date) and 64-bit (qdate)
110 alt((
111 tag("beqldate"),
112 tag("leqldate"),
113 tag("beqdate"),
114 tag("leqdate"),
115 tag("qldate"),
116 tag("qdate"),
117 tag("beldate"),
118 tag("leldate"),
119 tag("bedate"),
120 tag("ldate"),
121 tag("ledate"),
122 tag("date"),
123 )),
124 // String types (and regex/search, which share the string-type family).
125 //
126 // `lestring16`/`bestring16` are listed before `string` because nom
127 // tries each tag in order and we need the longer keyword to win when
128 // both could plausibly match -- in practice the prefixes (`lestring`,
129 // `bestring`) don't collide with anything else, but ordering by
130 // length is the safer pattern as more keywords are added.
131 alt((
132 tag("lestring16"),
133 tag("bestring16"),
134 tag("pstring"),
135 tag("search"),
136 tag("regex"),
137 tag("string"),
138 )),
139 // Meta / control-flow directives. `indirect` is listed first so the
140 // longest match is tried before `default`, `clear`, `name`, `use`;
141 // none of these collide with other supported keywords.
142 //
143 // `offset` is recognized here so the parser can accept magic files
144 // that use it (e.g. `searchbug.magic`). It maps to
145 // `TypeKind::Meta(MetaType::Offset)` and is fully evaluated by the
146 // engine: the resolved offset is emitted as `Value::Uint(position)`
147 // and participates in printf-style format substitution.
148 alt((
149 tag("indirect"),
150 tag("default"),
151 tag("offset"),
152 tag("clear"),
153 tag("name"),
154 tag("use"),
155 )),
156 ))
157 .parse(input)
158}
159
160/// Convert a type keyword string to its corresponding [`TypeKind`]
161///
162/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
163/// appropriate `TypeKind` variant with correct endianness and signedness settings.
164///
165/// # Conventions
166///
167/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
168/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
169/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
170/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
171/// - No endian prefix means native endianness
172///
173/// Returns `Ok(None)` for `regex` and `search`, which cannot be constructed
174/// from the keyword alone -- they require suffix parsing (flags/count
175/// for regex, mandatory `NonZeroUsize` range for search) that only
176/// happens in `parser::grammar::parse_type_and_operator`. Callers that
177/// need a complete `TypeKind::Regex` or `TypeKind::Search` must build
178/// it directly in the grammar layer, not via this function.
179///
180/// # Examples
181///
182/// ```
183/// use libmagic_rs::parser::types::type_keyword_to_kind;
184/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
185///
186/// assert_eq!(type_keyword_to_kind("byte"), Ok(Some(TypeKind::Byte { signed: true })));
187/// assert_eq!(type_keyword_to_kind("ubyte"), Ok(Some(TypeKind::Byte { signed: false })));
188/// assert_eq!(
189/// type_keyword_to_kind("beshort"),
190/// Ok(Some(TypeKind::Short { endian: Endianness::Big, signed: true }))
191/// );
192/// // regex/search require suffix parsing, so the keyword alone returns Ok(None).
193/// assert_eq!(type_keyword_to_kind("regex"), Ok(None));
194/// assert_eq!(type_keyword_to_kind("search"), Ok(None));
195/// // Unknown keywords return a structured error.
196/// assert!(type_keyword_to_kind("bogus").is_err());
197/// ```
198///
199/// # Returns
200///
201/// * `Ok(Some(TypeKind))` for fully-specified keywords (byte, short, long,
202/// quad, float, double, date, qdate, string, pstring and all their
203/// variants).
204/// * `Ok(None)` for suffix-required keywords (`regex`, `search`) which
205/// cannot be converted from the keyword alone -- the grammar layer
206/// builds their `TypeKind` directly after parsing the suffix.
207/// * `Err(UnknownTypeKeyword)` if `type_name` is not a recognized
208/// keyword. Under normal control flow this only happens when a caller
209/// bypasses [`parse_type_keyword`] (which is the only supported way
210/// to produce valid input for this function).
211///
212/// # Errors
213///
214/// Returns [`UnknownTypeKeyword`] when `type_name` is not one of the
215/// keywords recognized by [`parse_type_keyword`]. This replaces a prior
216/// `unreachable!` panic; library code must never panic on untrusted
217/// input, and the structured error lets callers translate the failure
218/// into their own error type (e.g. a nom parse error or a richer
219/// `ParseError::InvalidType`).
220pub fn type_keyword_to_kind(type_name: &str) -> Result<Option<TypeKind>, UnknownTypeKeyword> {
221 // `regex` and `search` cannot be constructed from the keyword alone.
222 // They require suffix parsing (flags/count for regex, mandatory
223 // `NonZeroUsize` range for search) which only happens in
224 // `parse_type_and_operator` in grammar/mod.rs. Returning `None`
225 // here makes the "keyword alone isn't enough" invariant
226 // type-enforced instead of relying on a placeholder that the
227 // grammar layer is expected to overwrite.
228 //
229 // `name` and `use` also return `Ok(None)` because their identifier
230 // suffix is parsed in the grammar layer, following the same
231 // "keyword alone isn't enough" pattern.
232 if matches!(type_name, "regex" | "search" | "name" | "use") {
233 return Ok(None);
234 }
235
236 // Meta / control-flow directives with no trailing operand are fully
237 // specified by the keyword alone. `offset` maps to
238 // `MetaType::Offset` which the engine evaluates by emitting the
239 // resolved file position as `Value::Uint` for format substitution.
240 match type_name {
241 "default" => return Ok(Some(TypeKind::Meta(MetaType::Default))),
242 "clear" => return Ok(Some(TypeKind::Meta(MetaType::Clear))),
243 "indirect" => return Ok(Some(TypeKind::Meta(MetaType::Indirect))),
244 "offset" => return Ok(Some(TypeKind::Meta(MetaType::Offset))),
245 _ => {}
246 }
247
248 if let Some(kind) = byte_family(type_name)
249 .or_else(|| short_family(type_name))
250 .or_else(|| long_family(type_name))
251 .or_else(|| quad_family(type_name))
252 .or_else(|| float_family(type_name))
253 .or_else(|| double_family(type_name))
254 .or_else(|| date_family(type_name))
255 .or_else(|| qdate_family(type_name))
256 .or_else(|| string_family(type_name))
257 .or_else(|| string16_family(type_name))
258 {
259 return Ok(Some(kind));
260 }
261
262 Err(UnknownTypeKeyword {
263 keyword: type_name.to_string(),
264 })
265}
266
267/// Map a byte-family keyword (`byte`, `ubyte`) to its `TypeKind`.
268fn byte_family(name: &str) -> Option<TypeKind> {
269 match name {
270 "byte" => Some(TypeKind::Byte { signed: true }),
271 "ubyte" => Some(TypeKind::Byte { signed: false }),
272 _ => None,
273 }
274}
275
276/// Map a short-family keyword (`short`/`ushort`/`beshort`/...) to its `TypeKind`.
277fn short_family(name: &str) -> Option<TypeKind> {
278 let (endian, signed) = match name {
279 "short" => (Endianness::Native, true),
280 "ushort" => (Endianness::Native, false),
281 "leshort" => (Endianness::Little, true),
282 "uleshort" => (Endianness::Little, false),
283 "beshort" => (Endianness::Big, true),
284 "ubeshort" => (Endianness::Big, false),
285 _ => return None,
286 };
287 Some(TypeKind::Short { endian, signed })
288}
289
290/// Map a long-family keyword (`long`/`ulong`/`belong`/...) to its `TypeKind`.
291fn long_family(name: &str) -> Option<TypeKind> {
292 let (endian, signed) = match name {
293 "long" => (Endianness::Native, true),
294 "ulong" => (Endianness::Native, false),
295 "lelong" => (Endianness::Little, true),
296 "ulelong" => (Endianness::Little, false),
297 "belong" => (Endianness::Big, true),
298 "ubelong" => (Endianness::Big, false),
299 _ => return None,
300 };
301 Some(TypeKind::Long { endian, signed })
302}
303
304/// Map a quad-family keyword (`quad`/`uquad`/`bequad`/...) to its `TypeKind`.
305fn quad_family(name: &str) -> Option<TypeKind> {
306 let (endian, signed) = match name {
307 "quad" => (Endianness::Native, true),
308 "uquad" => (Endianness::Native, false),
309 "lequad" => (Endianness::Little, true),
310 "ulequad" => (Endianness::Little, false),
311 "bequad" => (Endianness::Big, true),
312 "ubequad" => (Endianness::Big, false),
313 _ => return None,
314 };
315 Some(TypeKind::Quad { endian, signed })
316}
317
318/// Map a float-family keyword (`float`/`befloat`/`lefloat`) to its `TypeKind`.
319fn float_family(name: &str) -> Option<TypeKind> {
320 let endian = match name {
321 "float" => Endianness::Native,
322 "befloat" => Endianness::Big,
323 "lefloat" => Endianness::Little,
324 _ => return None,
325 };
326 Some(TypeKind::Float { endian })
327}
328
329/// Map a double-family keyword (`double`/`bedouble`/`ledouble`) to its `TypeKind`.
330fn double_family(name: &str) -> Option<TypeKind> {
331 let endian = match name {
332 "double" => Endianness::Native,
333 "bedouble" => Endianness::Big,
334 "ledouble" => Endianness::Little,
335 _ => return None,
336 };
337 Some(TypeKind::Double { endian })
338}
339
340/// Map a 32-bit date keyword (`date`/`ldate`/`bedate`/...) to its `TypeKind`.
341fn date_family(name: &str) -> Option<TypeKind> {
342 let (endian, utc) = match name {
343 "date" => (Endianness::Native, true),
344 "ldate" => (Endianness::Native, false),
345 "bedate" => (Endianness::Big, true),
346 "beldate" => (Endianness::Big, false),
347 "ledate" => (Endianness::Little, true),
348 "leldate" => (Endianness::Little, false),
349 _ => return None,
350 };
351 Some(TypeKind::Date { endian, utc })
352}
353
354/// Map a 64-bit date keyword (`qdate`/`qldate`/`beqdate`/...) to its `TypeKind`.
355fn qdate_family(name: &str) -> Option<TypeKind> {
356 let (endian, utc) = match name {
357 "qdate" => (Endianness::Native, true),
358 "qldate" => (Endianness::Native, false),
359 "beqdate" => (Endianness::Big, true),
360 "beqldate" => (Endianness::Big, false),
361 "leqdate" => (Endianness::Little, true),
362 "leqldate" => (Endianness::Little, false),
363 _ => return None,
364 };
365 Some(TypeKind::QDate { endian, utc })
366}
367
368/// Map a string-family keyword (`string`, `pstring`) to its `TypeKind`.
369///
370/// `pstring` defaults to a 1-byte length prefix; the grammar layer
371/// overwrites `length_width` / `length_includes_itself` from any
372/// trailing `/B`/`/H`/`/h`/`/L`/`/l`/`/J` suffix.
373fn string_family(name: &str) -> Option<TypeKind> {
374 match name {
375 "string" => Some(TypeKind::String { max_length: None }),
376 "pstring" => Some(TypeKind::PString {
377 max_length: None,
378 length_width: PStringLengthWidth::OneByte,
379 length_includes_itself: false,
380 }),
381 _ => None,
382 }
383}
384
385/// Map a UCS-2 string keyword (`lestring16`/`bestring16`) to its `TypeKind`.
386///
387/// magic(5) defines only the explicitly-endian forms; bare `string16` is not
388/// a valid keyword.
389fn string16_family(name: &str) -> Option<TypeKind> {
390 match name {
391 "lestring16" => Some(TypeKind::String16 {
392 endian: Endianness::Little,
393 }),
394 "bestring16" => Some(TypeKind::String16 {
395 endian: Endianness::Big,
396 }),
397 _ => None,
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use crate::parser::ast::Endianness;
405
406 // ============================================================
407 // parse_type_keyword tests
408 // ============================================================
409
410 #[test]
411 fn test_parse_type_keyword_byte_variants() {
412 assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
413 assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
414 }
415
416 #[test]
417 fn test_parse_type_keyword_short_variants() {
418 let cases = [
419 ("short", "short"),
420 ("ushort", "ushort"),
421 ("leshort", "leshort"),
422 ("uleshort", "uleshort"),
423 ("beshort", "beshort"),
424 ("ubeshort", "ubeshort"),
425 ];
426 for (input, expected) in cases {
427 let input_with_rest = format!("{input} rest");
428 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
429 assert_eq!(keyword, expected, "Failed for input: {input}");
430 assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
431 }
432 }
433
434 #[test]
435 fn test_parse_type_keyword_long_variants() {
436 let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
437 for input in cases {
438 let input_with_rest = format!("{input} rest");
439 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
440 assert_eq!(keyword, input, "Failed for: {input}");
441 assert_eq!(rest, " rest");
442 }
443 }
444
445 #[test]
446 fn test_parse_type_keyword_quad_variants() {
447 let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
448 for input in cases {
449 let input_with_rest = format!("{input} rest");
450 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
451 assert_eq!(keyword, input, "Failed for: {input}");
452 assert_eq!(rest, " rest");
453 }
454 }
455
456 #[test]
457 fn test_parse_type_keyword_string() {
458 assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
459 }
460
461 #[test]
462 fn test_parse_type_keyword_unknown() {
463 assert!(parse_type_keyword("unknown rest").is_err());
464 }
465
466 #[test]
467 fn test_parse_type_keyword_empty() {
468 assert!(parse_type_keyword("").is_err());
469 }
470
471 // ============================================================
472 // type_keyword_to_kind tests
473 // ============================================================
474
475 #[test]
476 fn test_type_keyword_to_kind_byte() {
477 assert_eq!(
478 type_keyword_to_kind("byte"),
479 Ok(Some(TypeKind::Byte { signed: true }))
480 );
481 assert_eq!(
482 type_keyword_to_kind("ubyte"),
483 Ok(Some(TypeKind::Byte { signed: false }))
484 );
485 }
486
487 #[test]
488 fn test_type_keyword_to_kind_short_endianness() {
489 assert_eq!(
490 type_keyword_to_kind("short"),
491 Ok(Some(TypeKind::Short {
492 endian: Endianness::Native,
493 signed: true
494 }))
495 );
496 assert_eq!(
497 type_keyword_to_kind("leshort"),
498 Ok(Some(TypeKind::Short {
499 endian: Endianness::Little,
500 signed: true
501 }))
502 );
503 assert_eq!(
504 type_keyword_to_kind("beshort"),
505 Ok(Some(TypeKind::Short {
506 endian: Endianness::Big,
507 signed: true
508 }))
509 );
510 }
511
512 #[test]
513 fn test_type_keyword_to_kind_unsigned_variants() {
514 assert_eq!(
515 type_keyword_to_kind("ushort"),
516 Ok(Some(TypeKind::Short {
517 endian: Endianness::Native,
518 signed: false
519 }))
520 );
521 assert_eq!(
522 type_keyword_to_kind("ulong"),
523 Ok(Some(TypeKind::Long {
524 endian: Endianness::Native,
525 signed: false
526 }))
527 );
528 assert_eq!(
529 type_keyword_to_kind("uquad"),
530 Ok(Some(TypeKind::Quad {
531 endian: Endianness::Native,
532 signed: false
533 }))
534 );
535 }
536
537 #[test]
538 fn test_type_keyword_to_kind_signed_defaults() {
539 // libmagic types are signed by default
540 assert_eq!(
541 type_keyword_to_kind("long"),
542 Ok(Some(TypeKind::Long {
543 endian: Endianness::Native,
544 signed: true
545 }))
546 );
547 assert_eq!(
548 type_keyword_to_kind("quad"),
549 Ok(Some(TypeKind::Quad {
550 endian: Endianness::Native,
551 signed: true
552 }))
553 );
554 }
555
556 #[test]
557 fn test_type_keyword_to_kind_string() {
558 assert_eq!(
559 type_keyword_to_kind("string"),
560 Ok(Some(TypeKind::String { max_length: None }))
561 );
562 }
563
564 #[test]
565 fn test_parse_type_keyword_pstring() {
566 assert_eq!(parse_type_keyword("pstring rest"), Ok((" rest", "pstring")));
567 }
568
569 #[test]
570 fn test_type_keyword_to_kind_pstring() {
571 assert_eq!(
572 type_keyword_to_kind("pstring"),
573 Ok(Some(TypeKind::PString {
574 max_length: None,
575 length_width: PStringLengthWidth::OneByte,
576 length_includes_itself: false
577 }))
578 );
579 }
580
581 #[test]
582 fn test_type_keyword_to_kind_regex_and_search_return_none() {
583 // regex and search require suffix parsing (flags/count/range)
584 // that only happens in grammar/mod.rs. The keyword-to-kind
585 // function deliberately returns Ok(None) for them so callers
586 // are forced to use the grammar layer's direct construction.
587 assert_eq!(type_keyword_to_kind("regex"), Ok(None));
588 assert_eq!(type_keyword_to_kind("search"), Ok(None));
589 }
590
591 #[test]
592 fn test_type_keyword_to_kind_unknown_returns_err() {
593 // Unknown keywords produce a structured error instead of a
594 // panic. This path is not reachable through `parse_type_keyword`
595 // (which rejects unknown keywords before this function runs),
596 // but it is reachable if a caller constructs the input string
597 // directly, so the error must be representable.
598 let err = type_keyword_to_kind("nonexistent").expect_err("unknown keyword must return Err");
599 assert_eq!(err.keyword, "nonexistent");
600 // And the Display impl mentions the keyword for debuggability.
601 assert!(err.to_string().contains("nonexistent"));
602 }
603
604 #[test]
605 fn test_pstring_keyword_defaults_to_one_byte_width() {
606 // pstring keyword alone should produce OneByte length_width
607 // (suffix parsing is handled by grammar/mod.rs, not types.rs)
608 let kind = type_keyword_to_kind("pstring")
609 .expect("pstring is a known keyword")
610 .expect("pstring maps to Some(TypeKind)");
611 match kind {
612 TypeKind::PString {
613 max_length,
614 length_width,
615 length_includes_itself: _,
616 } => {
617 assert_eq!(
618 max_length, None,
619 "pstring default should have no max_length"
620 );
621 assert_eq!(
622 length_width,
623 PStringLengthWidth::OneByte,
624 "pstring default should be OneByte"
625 );
626 }
627 _ => panic!("Expected TypeKind::PString, got {kind:?}"),
628 }
629 }
630
631 #[test]
632 fn test_pstring_keyword_does_not_consume_suffix() {
633 // parse_type_keyword should only consume "pstring", leaving suffix for grammar
634 let (rest, keyword) = parse_type_keyword("pstring/H =value").unwrap();
635 assert_eq!(keyword, "pstring");
636 assert_eq!(
637 rest, "/H =value",
638 "Suffix should remain unconsumed by type keyword parser"
639 );
640 }
641
642 #[test]
643 fn test_pstring_keyword_boundary() {
644 // pstring at exact boundary (no trailing input)
645 let (rest, keyword) = parse_type_keyword("pstring").unwrap();
646 assert_eq!(keyword, "pstring");
647 assert_eq!(rest, "");
648 }
649
650 #[test]
651 fn test_pstring_before_operator() {
652 // pstring followed by whitespace then operator
653 let (rest, keyword) = parse_type_keyword("pstring =hello").unwrap();
654 assert_eq!(keyword, "pstring");
655 assert_eq!(rest, " =hello");
656 }
657
658 #[test]
659 fn test_parse_type_keyword_string16_variants() {
660 let (rest, kw) = parse_type_keyword("lestring16 rest").unwrap();
661 assert_eq!(kw, "lestring16");
662 assert_eq!(rest, " rest");
663
664 let (rest, kw) = parse_type_keyword("bestring16 rest").unwrap();
665 assert_eq!(kw, "bestring16");
666 assert_eq!(rest, " rest");
667 }
668
669 #[test]
670 fn test_string16_keyword_to_kind() {
671 assert_eq!(
672 type_keyword_to_kind("lestring16"),
673 Ok(Some(TypeKind::String16 {
674 endian: Endianness::Little,
675 }))
676 );
677 assert_eq!(
678 type_keyword_to_kind("bestring16"),
679 Ok(Some(TypeKind::String16 {
680 endian: Endianness::Big,
681 }))
682 );
683 }
684
685 #[test]
686 fn test_roundtrip_all_keywords() {
687 // Verify that every keyword parsed by parse_type_keyword can be
688 // converted to a TypeKind by type_keyword_to_kind. Regex and
689 // search are excluded from the conversion side because they
690 // require suffix parsing in grammar/mod.rs and deliberately
691 // return None from `type_keyword_to_kind`; the keyword
692 // parser still recognizes them.
693 let convertible_keywords = [
694 "byte",
695 "ubyte",
696 "short",
697 "ushort",
698 "leshort",
699 "uleshort",
700 "beshort",
701 "ubeshort",
702 "long",
703 "ulong",
704 "lelong",
705 "ulelong",
706 "belong",
707 "ubelong",
708 "quad",
709 "uquad",
710 "lequad",
711 "ulequad",
712 "bequad",
713 "ubequad",
714 "float",
715 "befloat",
716 "lefloat",
717 "double",
718 "bedouble",
719 "ledouble",
720 "date",
721 "ldate",
722 "bedate",
723 "beldate",
724 "ledate",
725 "leldate",
726 "qdate",
727 "qldate",
728 "beqdate",
729 "beqldate",
730 "leqdate",
731 "leqldate",
732 "pstring",
733 "string",
734 "lestring16",
735 "bestring16",
736 "default",
737 "clear",
738 "indirect",
739 "offset",
740 ];
741 for keyword in convertible_keywords {
742 let (rest, parsed) = parse_type_keyword(keyword).unwrap();
743 assert_eq!(rest, "", "Keyword {keyword} should consume all input");
744 assert!(
745 type_keyword_to_kind(parsed).is_ok_and(|o| o.is_some()),
746 "{keyword} should map to Ok(Some(TypeKind))"
747 );
748 }
749 // regex, search, name, and use are recognized by parse_type_keyword
750 // but require grammar-layer suffix parsing (flags/count/range or an
751 // identifier) to construct their TypeKind. Verify both sides of
752 // this split invariant.
753 for keyword in ["regex", "search", "name", "use"] {
754 let (rest, parsed) = parse_type_keyword(keyword).unwrap();
755 assert_eq!(rest, "", "Keyword {keyword} should consume all input");
756 assert_eq!(
757 type_keyword_to_kind(parsed),
758 Ok(None),
759 "{keyword} should return Ok(None) from keyword-to-kind"
760 );
761 }
762 }
763}