libmagic_rs/parser/types.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, MetaType, PStringLengthWidth, StringFlags, TypeKind};
15
16/// Error returned by [`type_keyword_to_kind`] when the supplied keyword is
17/// not a recognized magic type keyword.
18///
19/// This is a tight, structured error surfaced from a pure mapping function
20/// that has no access to line-number context. Callers that *do* have line
21/// context (e.g. the grammar layer wrapping a higher-level parse) can
22/// convert it into a richer [`crate::error::ParseError`] variant if needed.
23/// The struct is `#[non_exhaustive]` so future fields (e.g. suggested
24/// alternatives) can be added without a major version bump.
25///
26/// # Examples
27///
28/// ```
29/// use libmagic_rs::parser::types::{type_keyword_to_kind, UnknownTypeKeyword};
30///
31/// let err = type_keyword_to_kind("notarealtype").unwrap_err();
32/// assert_eq!(err.keyword, "notarealtype");
33/// ```
34#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
35#[non_exhaustive]
36#[error("unknown type keyword: {keyword}")]
37pub struct UnknownTypeKeyword {
38 /// The keyword string that was not recognized.
39 pub keyword: String,
40}
41
42/// Parse a type keyword from magic file input
43///
44/// Recognizes all supported type keywords and returns the matched keyword string.
45/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
46/// prefixes matched first within each group to avoid ambiguous partial matches.
47///
48/// # Supported Keywords
49///
50/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
51/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
52/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
53/// - 8-bit: `ubyte`, `byte`
54/// - String: `pstring`, `string`
55///
56/// # Examples
57///
58/// ```
59/// use libmagic_rs::parser::types::parse_type_keyword;
60///
61/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
62/// assert_eq!(keyword, "bequad");
63/// assert_eq!(rest, " rest");
64/// ```
65///
66/// # Errors
67///
68/// Returns a nom parsing error if the input doesn't start with a known type keyword.
69pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
70 alt((
71 // 64-bit types (6 branches)
72 alt((
73 tag("ubequad"),
74 tag("ulequad"),
75 tag("uquad"),
76 tag("bequad"),
77 tag("lequad"),
78 tag("quad"),
79 )),
80 // 32-bit types (6 branches)
81 alt((
82 tag("ubelong"),
83 tag("ulelong"),
84 tag("ulong"),
85 tag("belong"),
86 tag("lelong"),
87 tag("long"),
88 )),
89 // 16-bit types (6 branches)
90 alt((
91 tag("ubeshort"),
92 tag("uleshort"),
93 tag("ushort"),
94 tag("beshort"),
95 tag("leshort"),
96 tag("short"),
97 )),
98 // 8-bit types (2 branches)
99 alt((tag("ubyte"), tag("byte"))),
100 // Float/double types (6 branches)
101 alt((
102 tag("bedouble"),
103 tag("ledouble"),
104 tag("double"),
105 tag("befloat"),
106 tag("lefloat"),
107 tag("float"),
108 )),
109 // Date types -- 32-bit (date) and 64-bit (qdate)
110 alt((
111 tag("beqldate"),
112 tag("leqldate"),
113 tag("beqdate"),
114 tag("leqdate"),
115 tag("qldate"),
116 tag("qdate"),
117 tag("beldate"),
118 tag("leldate"),
119 tag("bedate"),
120 tag("ldate"),
121 tag("ledate"),
122 tag("date"),
123 )),
124 // String types (and regex/search, which share the string-type family).
125 //
126 // `lestring16`/`bestring16` are listed before `string` because nom
127 // tries each tag in order and we need the longer keyword to win when
128 // both could plausibly match -- in practice the prefixes (`lestring`,
129 // `bestring`) don't collide with anything else, but ordering by
130 // length is the safer pattern as more keywords are added.
131 alt((
132 tag("lestring16"),
133 tag("bestring16"),
134 tag("pstring"),
135 tag("search"),
136 tag("regex"),
137 tag("string"),
138 )),
139 // Meta / control-flow directives. `indirect` is listed first so the
140 // longest match is tried before `default`, `clear`, `name`, `use`;
141 // none of these collide with other supported keywords.
142 //
143 // `offset` is recognized here so the parser can accept magic files
144 // that use it (e.g. `searchbug.magic`). It maps to
145 // `TypeKind::Meta(MetaType::Offset)` and is fully evaluated by the
146 // engine: the resolved offset is emitted as `Value::Uint(position)`
147 // and participates in printf-style format substitution.
148 alt((
149 tag("indirect"),
150 tag("default"),
151 tag("offset"),
152 tag("clear"),
153 tag("name"),
154 tag("use"),
155 )),
156 ))
157 .parse(input)
158}
159
160/// Convert a type keyword string to its corresponding [`TypeKind`]
161///
162/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
163/// appropriate `TypeKind` variant with correct endianness and signedness settings.
164///
165/// # Conventions
166///
167/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
168/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
169/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
170/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
171/// - No endian prefix means native endianness
172///
173/// Returns `Ok(None)` for `regex` and `search`, which cannot be constructed
174/// from the keyword alone -- they require suffix parsing (flags/count
175/// for regex, mandatory `NonZeroUsize` range for search) that only
176/// happens in `parser::grammar::parse_type_and_operator`. Callers that
177/// need a complete `TypeKind::Regex` or `TypeKind::Search` must build
178/// it directly in the grammar layer, not via this function.
179///
180/// # Examples
181///
182/// ```
183/// use libmagic_rs::parser::types::type_keyword_to_kind;
184/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
185///
186/// assert_eq!(type_keyword_to_kind("byte"), Ok(Some(TypeKind::Byte { signed: true })));
187/// assert_eq!(type_keyword_to_kind("ubyte"), Ok(Some(TypeKind::Byte { signed: false })));
188/// assert_eq!(
189/// type_keyword_to_kind("beshort"),
190/// Ok(Some(TypeKind::Short { endian: Endianness::Big, signed: true }))
191/// );
192/// // regex/search require suffix parsing, so the keyword alone returns Ok(None).
193/// assert_eq!(type_keyword_to_kind("regex"), Ok(None));
194/// assert_eq!(type_keyword_to_kind("search"), Ok(None));
195/// // Unknown keywords return a structured error.
196/// assert!(type_keyword_to_kind("bogus").is_err());
197/// ```
198///
199/// # Returns
200///
201/// * `Ok(Some(TypeKind))` for fully-specified keywords (byte, short, long,
202/// quad, float, double, date, qdate, string, pstring and all their
203/// variants).
204/// * `Ok(None)` for suffix-required keywords (`regex`, `search`) which
205/// cannot be converted from the keyword alone -- the grammar layer
206/// builds their `TypeKind` directly after parsing the suffix.
207/// * `Err(UnknownTypeKeyword)` if `type_name` is not a recognized
208/// keyword. Under normal control flow this only happens when a caller
209/// bypasses [`parse_type_keyword`] (which is the only supported way
210/// to produce valid input for this function).
211///
212/// # Errors
213///
214/// Returns [`UnknownTypeKeyword`] when `type_name` is not one of the
215/// keywords recognized by [`parse_type_keyword`]. This replaces a prior
216/// `unreachable!` panic; library code must never panic on untrusted
217/// input, and the structured error lets callers translate the failure
218/// into their own error type (e.g. a nom parse error or a richer
219/// `ParseError::InvalidType`).
220pub fn type_keyword_to_kind(type_name: &str) -> Result<Option<TypeKind>, UnknownTypeKeyword> {
221 // `regex` and `search` cannot be constructed from the keyword alone.
222 // They require suffix parsing (flags/count for regex, mandatory
223 // `NonZeroUsize` range for search) which only happens in
224 // `parse_type_and_operator` in grammar/mod.rs. Returning `None`
225 // here makes the "keyword alone isn't enough" invariant
226 // type-enforced instead of relying on a placeholder that the
227 // grammar layer is expected to overwrite.
228 //
229 // `name` and `use` also return `Ok(None)` because their identifier
230 // suffix is parsed in the grammar layer, following the same
231 // "keyword alone isn't enough" pattern.
232 if matches!(type_name, "regex" | "search" | "name" | "use") {
233 return Ok(None);
234 }
235
236 // Meta / control-flow directives with no trailing operand are fully
237 // specified by the keyword alone. `offset` maps to
238 // `MetaType::Offset` which the engine evaluates by emitting the
239 // resolved file position as `Value::Uint` for format substitution.
240 match type_name {
241 "default" => return Ok(Some(TypeKind::Meta(MetaType::Default))),
242 "clear" => return Ok(Some(TypeKind::Meta(MetaType::Clear))),
243 "indirect" => return Ok(Some(TypeKind::Meta(MetaType::Indirect))),
244 "offset" => return Ok(Some(TypeKind::Meta(MetaType::Offset))),
245 _ => {}
246 }
247
248 if let Some(kind) = byte_family(type_name)
249 .or_else(|| short_family(type_name))
250 .or_else(|| long_family(type_name))
251 .or_else(|| quad_family(type_name))
252 .or_else(|| float_family(type_name))
253 .or_else(|| double_family(type_name))
254 .or_else(|| date_family(type_name))
255 .or_else(|| qdate_family(type_name))
256 .or_else(|| string_family(type_name))
257 .or_else(|| string16_family(type_name))
258 {
259 return Ok(Some(kind));
260 }
261
262 Err(UnknownTypeKeyword {
263 keyword: type_name.to_string(),
264 })
265}
266
267/// Map a byte-family keyword (`byte`, `ubyte`) to its `TypeKind`.
268fn byte_family(name: &str) -> Option<TypeKind> {
269 match name {
270 "byte" => Some(TypeKind::Byte { signed: true }),
271 "ubyte" => Some(TypeKind::Byte { signed: false }),
272 _ => None,
273 }
274}
275
276/// Map a short-family keyword (`short`/`ushort`/`beshort`/...) to its `TypeKind`.
277fn short_family(name: &str) -> Option<TypeKind> {
278 let (endian, signed) = match name {
279 "short" => (Endianness::Native, true),
280 "ushort" => (Endianness::Native, false),
281 "leshort" => (Endianness::Little, true),
282 "uleshort" => (Endianness::Little, false),
283 "beshort" => (Endianness::Big, true),
284 "ubeshort" => (Endianness::Big, false),
285 _ => return None,
286 };
287 Some(TypeKind::Short { endian, signed })
288}
289
290/// Map a long-family keyword (`long`/`ulong`/`belong`/...) to its `TypeKind`.
291fn long_family(name: &str) -> Option<TypeKind> {
292 let (endian, signed) = match name {
293 "long" => (Endianness::Native, true),
294 "ulong" => (Endianness::Native, false),
295 "lelong" => (Endianness::Little, true),
296 "ulelong" => (Endianness::Little, false),
297 "belong" => (Endianness::Big, true),
298 "ubelong" => (Endianness::Big, false),
299 _ => return None,
300 };
301 Some(TypeKind::Long { endian, signed })
302}
303
304/// Map a quad-family keyword (`quad`/`uquad`/`bequad`/...) to its `TypeKind`.
305fn quad_family(name: &str) -> Option<TypeKind> {
306 let (endian, signed) = match name {
307 "quad" => (Endianness::Native, true),
308 "uquad" => (Endianness::Native, false),
309 "lequad" => (Endianness::Little, true),
310 "ulequad" => (Endianness::Little, false),
311 "bequad" => (Endianness::Big, true),
312 "ubequad" => (Endianness::Big, false),
313 _ => return None,
314 };
315 Some(TypeKind::Quad { endian, signed })
316}
317
318/// Map a float-family keyword (`float`/`befloat`/`lefloat`) to its `TypeKind`.
319fn float_family(name: &str) -> Option<TypeKind> {
320 let endian = match name {
321 "float" => Endianness::Native,
322 "befloat" => Endianness::Big,
323 "lefloat" => Endianness::Little,
324 _ => return None,
325 };
326 Some(TypeKind::Float { endian })
327}
328
329/// Map a double-family keyword (`double`/`bedouble`/`ledouble`) to its `TypeKind`.
330fn double_family(name: &str) -> Option<TypeKind> {
331 let endian = match name {
332 "double" => Endianness::Native,
333 "bedouble" => Endianness::Big,
334 "ledouble" => Endianness::Little,
335 _ => return None,
336 };
337 Some(TypeKind::Double { endian })
338}
339
340/// Map a 32-bit date keyword (`date`/`ldate`/`bedate`/...) to its `TypeKind`.
341fn date_family(name: &str) -> Option<TypeKind> {
342 let (endian, utc) = match name {
343 "date" => (Endianness::Native, true),
344 "ldate" => (Endianness::Native, false),
345 "bedate" => (Endianness::Big, true),
346 "beldate" => (Endianness::Big, false),
347 "ledate" => (Endianness::Little, true),
348 "leldate" => (Endianness::Little, false),
349 _ => return None,
350 };
351 Some(TypeKind::Date { endian, utc })
352}
353
354/// Map a 64-bit date keyword (`qdate`/`qldate`/`beqdate`/...) to its `TypeKind`.
355fn qdate_family(name: &str) -> Option<TypeKind> {
356 let (endian, utc) = match name {
357 "qdate" => (Endianness::Native, true),
358 "qldate" => (Endianness::Native, false),
359 "beqdate" => (Endianness::Big, true),
360 "beqldate" => (Endianness::Big, false),
361 "leqdate" => (Endianness::Little, true),
362 "leqldate" => (Endianness::Little, false),
363 _ => return None,
364 };
365 Some(TypeKind::QDate { endian, utc })
366}
367
368/// Map a string-family keyword (`string`, `pstring`) to its `TypeKind`.
369///
370/// `pstring` defaults to a 1-byte length prefix; the grammar layer
371/// overwrites `length_width` / `length_includes_itself` from any
372/// trailing `/B`/`/H`/`/h`/`/L`/`/l`/`/J` suffix.
373fn string_family(name: &str) -> Option<TypeKind> {
374 match name {
375 "string" => Some(TypeKind::String {
376 max_length: None,
377 flags: StringFlags::default(),
378 }),
379 "pstring" => Some(TypeKind::PString {
380 max_length: None,
381 length_width: PStringLengthWidth::OneByte,
382 length_includes_itself: false,
383 }),
384 _ => None,
385 }
386}
387
388/// Map a UCS-2 string keyword (`lestring16`/`bestring16`) to its `TypeKind`.
389///
390/// magic(5) defines only the explicitly-endian forms; bare `string16` is not
391/// a valid keyword.
392fn string16_family(name: &str) -> Option<TypeKind> {
393 match name {
394 "lestring16" => Some(TypeKind::String16 {
395 endian: Endianness::Little,
396 }),
397 "bestring16" => Some(TypeKind::String16 {
398 endian: Endianness::Big,
399 }),
400 _ => None,
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 use crate::parser::ast::Endianness;
408
409 // ============================================================
410 // parse_type_keyword tests
411 // ============================================================
412
413 #[test]
414 fn test_parse_type_keyword_byte_variants() {
415 assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
416 assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
417 }
418
419 #[test]
420 fn test_parse_type_keyword_short_variants() {
421 let cases = [
422 ("short", "short"),
423 ("ushort", "ushort"),
424 ("leshort", "leshort"),
425 ("uleshort", "uleshort"),
426 ("beshort", "beshort"),
427 ("ubeshort", "ubeshort"),
428 ];
429 for (input, expected) in cases {
430 let input_with_rest = format!("{input} rest");
431 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
432 assert_eq!(keyword, expected, "Failed for input: {input}");
433 assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
434 }
435 }
436
437 #[test]
438 fn test_parse_type_keyword_long_variants() {
439 let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
440 for input in cases {
441 let input_with_rest = format!("{input} rest");
442 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
443 assert_eq!(keyword, input, "Failed for: {input}");
444 assert_eq!(rest, " rest");
445 }
446 }
447
448 #[test]
449 fn test_parse_type_keyword_quad_variants() {
450 let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
451 for input in cases {
452 let input_with_rest = format!("{input} rest");
453 let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
454 assert_eq!(keyword, input, "Failed for: {input}");
455 assert_eq!(rest, " rest");
456 }
457 }
458
459 #[test]
460 fn test_parse_type_keyword_string() {
461 assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
462 }
463
464 #[test]
465 fn test_parse_type_keyword_unknown() {
466 assert!(parse_type_keyword("unknown rest").is_err());
467 }
468
469 #[test]
470 fn test_parse_type_keyword_empty() {
471 assert!(parse_type_keyword("").is_err());
472 }
473
474 // ============================================================
475 // type_keyword_to_kind tests
476 // ============================================================
477
478 #[test]
479 fn test_type_keyword_to_kind_byte() {
480 assert_eq!(
481 type_keyword_to_kind("byte"),
482 Ok(Some(TypeKind::Byte { signed: true }))
483 );
484 assert_eq!(
485 type_keyword_to_kind("ubyte"),
486 Ok(Some(TypeKind::Byte { signed: false }))
487 );
488 }
489
490 #[test]
491 fn test_type_keyword_to_kind_short_endianness() {
492 assert_eq!(
493 type_keyword_to_kind("short"),
494 Ok(Some(TypeKind::Short {
495 endian: Endianness::Native,
496 signed: true
497 }))
498 );
499 assert_eq!(
500 type_keyword_to_kind("leshort"),
501 Ok(Some(TypeKind::Short {
502 endian: Endianness::Little,
503 signed: true
504 }))
505 );
506 assert_eq!(
507 type_keyword_to_kind("beshort"),
508 Ok(Some(TypeKind::Short {
509 endian: Endianness::Big,
510 signed: true
511 }))
512 );
513 }
514
515 #[test]
516 fn test_type_keyword_to_kind_unsigned_variants() {
517 assert_eq!(
518 type_keyword_to_kind("ushort"),
519 Ok(Some(TypeKind::Short {
520 endian: Endianness::Native,
521 signed: false
522 }))
523 );
524 assert_eq!(
525 type_keyword_to_kind("ulong"),
526 Ok(Some(TypeKind::Long {
527 endian: Endianness::Native,
528 signed: false
529 }))
530 );
531 assert_eq!(
532 type_keyword_to_kind("uquad"),
533 Ok(Some(TypeKind::Quad {
534 endian: Endianness::Native,
535 signed: false
536 }))
537 );
538 }
539
540 #[test]
541 fn test_type_keyword_to_kind_signed_defaults() {
542 // libmagic types are signed by default
543 assert_eq!(
544 type_keyword_to_kind("long"),
545 Ok(Some(TypeKind::Long {
546 endian: Endianness::Native,
547 signed: true
548 }))
549 );
550 assert_eq!(
551 type_keyword_to_kind("quad"),
552 Ok(Some(TypeKind::Quad {
553 endian: Endianness::Native,
554 signed: true
555 }))
556 );
557 }
558
559 #[test]
560 fn test_type_keyword_to_kind_string() {
561 assert_eq!(
562 type_keyword_to_kind("string"),
563 Ok(Some(TypeKind::String {
564 max_length: None,
565 flags: StringFlags::default()
566 }))
567 );
568 }
569
570 #[test]
571 fn test_parse_type_keyword_pstring() {
572 assert_eq!(parse_type_keyword("pstring rest"), Ok((" rest", "pstring")));
573 }
574
575 #[test]
576 fn test_type_keyword_to_kind_pstring() {
577 assert_eq!(
578 type_keyword_to_kind("pstring"),
579 Ok(Some(TypeKind::PString {
580 max_length: None,
581 length_width: PStringLengthWidth::OneByte,
582 length_includes_itself: false
583 }))
584 );
585 }
586
587 #[test]
588 fn test_type_keyword_to_kind_regex_and_search_return_none() {
589 // regex and search require suffix parsing (flags/count/range)
590 // that only happens in grammar/mod.rs. The keyword-to-kind
591 // function deliberately returns Ok(None) for them so callers
592 // are forced to use the grammar layer's direct construction.
593 assert_eq!(type_keyword_to_kind("regex"), Ok(None));
594 assert_eq!(type_keyword_to_kind("search"), Ok(None));
595 }
596
597 #[test]
598 fn test_type_keyword_to_kind_unknown_returns_err() {
599 // Unknown keywords produce a structured error instead of a
600 // panic. This path is not reachable through `parse_type_keyword`
601 // (which rejects unknown keywords before this function runs),
602 // but it is reachable if a caller constructs the input string
603 // directly, so the error must be representable.
604 let err = type_keyword_to_kind("nonexistent").expect_err("unknown keyword must return Err");
605 assert_eq!(err.keyword, "nonexistent");
606 // And the Display impl mentions the keyword for debuggability.
607 assert!(err.to_string().contains("nonexistent"));
608 }
609
610 #[test]
611 fn test_pstring_keyword_defaults_to_one_byte_width() {
612 // pstring keyword alone should produce OneByte length_width
613 // (suffix parsing is handled by grammar/mod.rs, not types.rs)
614 let kind = type_keyword_to_kind("pstring")
615 .expect("pstring is a known keyword")
616 .expect("pstring maps to Some(TypeKind)");
617 match kind {
618 TypeKind::PString {
619 max_length,
620 length_width,
621 length_includes_itself: _,
622 } => {
623 assert_eq!(
624 max_length, None,
625 "pstring default should have no max_length"
626 );
627 assert_eq!(
628 length_width,
629 PStringLengthWidth::OneByte,
630 "pstring default should be OneByte"
631 );
632 }
633 _ => panic!("Expected TypeKind::PString, got {kind:?}"),
634 }
635 }
636
637 #[test]
638 fn test_pstring_keyword_does_not_consume_suffix() {
639 // parse_type_keyword should only consume "pstring", leaving suffix for grammar
640 let (rest, keyword) = parse_type_keyword("pstring/H =value").unwrap();
641 assert_eq!(keyword, "pstring");
642 assert_eq!(
643 rest, "/H =value",
644 "Suffix should remain unconsumed by type keyword parser"
645 );
646 }
647
648 #[test]
649 fn test_pstring_keyword_boundary() {
650 // pstring at exact boundary (no trailing input)
651 let (rest, keyword) = parse_type_keyword("pstring").unwrap();
652 assert_eq!(keyword, "pstring");
653 assert_eq!(rest, "");
654 }
655
656 #[test]
657 fn test_pstring_before_operator() {
658 // pstring followed by whitespace then operator
659 let (rest, keyword) = parse_type_keyword("pstring =hello").unwrap();
660 assert_eq!(keyword, "pstring");
661 assert_eq!(rest, " =hello");
662 }
663
664 #[test]
665 fn test_parse_type_keyword_string16_variants() {
666 let (rest, kw) = parse_type_keyword("lestring16 rest").unwrap();
667 assert_eq!(kw, "lestring16");
668 assert_eq!(rest, " rest");
669
670 let (rest, kw) = parse_type_keyword("bestring16 rest").unwrap();
671 assert_eq!(kw, "bestring16");
672 assert_eq!(rest, " rest");
673 }
674
675 #[test]
676 fn test_string16_keyword_to_kind() {
677 assert_eq!(
678 type_keyword_to_kind("lestring16"),
679 Ok(Some(TypeKind::String16 {
680 endian: Endianness::Little,
681 }))
682 );
683 assert_eq!(
684 type_keyword_to_kind("bestring16"),
685 Ok(Some(TypeKind::String16 {
686 endian: Endianness::Big,
687 }))
688 );
689 }
690
691 #[test]
692 fn test_roundtrip_all_keywords() {
693 // Verify that every keyword parsed by parse_type_keyword can be
694 // converted to a TypeKind by type_keyword_to_kind. Regex and
695 // search are excluded from the conversion side because they
696 // require suffix parsing in grammar/mod.rs and deliberately
697 // return None from `type_keyword_to_kind`; the keyword
698 // parser still recognizes them.
699 let convertible_keywords = [
700 "byte",
701 "ubyte",
702 "short",
703 "ushort",
704 "leshort",
705 "uleshort",
706 "beshort",
707 "ubeshort",
708 "long",
709 "ulong",
710 "lelong",
711 "ulelong",
712 "belong",
713 "ubelong",
714 "quad",
715 "uquad",
716 "lequad",
717 "ulequad",
718 "bequad",
719 "ubequad",
720 "float",
721 "befloat",
722 "lefloat",
723 "double",
724 "bedouble",
725 "ledouble",
726 "date",
727 "ldate",
728 "bedate",
729 "beldate",
730 "ledate",
731 "leldate",
732 "qdate",
733 "qldate",
734 "beqdate",
735 "beqldate",
736 "leqdate",
737 "leqldate",
738 "pstring",
739 "string",
740 "lestring16",
741 "bestring16",
742 "default",
743 "clear",
744 "indirect",
745 "offset",
746 ];
747 for keyword in convertible_keywords {
748 let (rest, parsed) = parse_type_keyword(keyword).unwrap();
749 assert_eq!(rest, "", "Keyword {keyword} should consume all input");
750 assert!(
751 type_keyword_to_kind(parsed).is_ok_and(|o| o.is_some()),
752 "{keyword} should map to Ok(Some(TypeKind))"
753 );
754 }
755 // regex, search, name, and use are recognized by parse_type_keyword
756 // but require grammar-layer suffix parsing (flags/count/range or an
757 // identifier) to construct their TypeKind. Verify both sides of
758 // this split invariant.
759 for keyword in ["regex", "search", "name", "use"] {
760 let (rest, parsed) = parse_type_keyword(keyword).unwrap();
761 assert_eq!(rest, "", "Keyword {keyword} should consume all input");
762 assert_eq!(
763 type_keyword_to_kind(parsed),
764 Ok(None),
765 "{keyword} should return Ok(None) from keyword-to-kind"
766 );
767 }
768 }
769}