Skip to main content

libmagic_rs/parser/
types.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, MetaType, PStringLengthWidth, StringFlags, TypeKind};
15
16/// Error returned by [`type_keyword_to_kind`] when the supplied keyword is
17/// not a recognized magic type keyword.
18///
19/// This is a tight, structured error surfaced from a pure mapping function
20/// that has no access to line-number context. Callers that *do* have line
21/// context (e.g. the grammar layer wrapping a higher-level parse) can
22/// convert it into a richer [`crate::error::ParseError`] variant if needed.
23/// The struct is `#[non_exhaustive]` so future fields (e.g. suggested
24/// alternatives) can be added without a major version bump.
25///
26/// # Examples
27///
28/// ```
29/// use libmagic_rs::parser::types::{type_keyword_to_kind, UnknownTypeKeyword};
30///
31/// let err = type_keyword_to_kind("notarealtype").unwrap_err();
32/// assert_eq!(err.keyword, "notarealtype");
33/// ```
34#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
35#[non_exhaustive]
36#[error("unknown type keyword: {keyword}")]
37pub struct UnknownTypeKeyword {
38    /// The keyword string that was not recognized.
39    pub keyword: String,
40}
41
42/// Parse a type keyword from magic file input
43///
44/// Recognizes all supported type keywords and returns the matched keyword string.
45/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
46/// prefixes matched first within each group to avoid ambiguous partial matches.
47///
48/// # Supported Keywords
49///
50/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
51/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
52/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
53/// - 8-bit: `ubyte`, `byte`
54/// - String: `pstring`, `string`
55///
56/// # Examples
57///
58/// ```
59/// use libmagic_rs::parser::types::parse_type_keyword;
60///
61/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
62/// assert_eq!(keyword, "bequad");
63/// assert_eq!(rest, " rest");
64/// ```
65///
66/// # Errors
67///
68/// Returns a nom parsing error if the input doesn't start with a known type keyword.
69pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
70    alt((
71        // 64-bit types (6 branches)
72        alt((
73            tag("ubequad"),
74            tag("ulequad"),
75            tag("uquad"),
76            tag("bequad"),
77            tag("lequad"),
78            tag("quad"),
79        )),
80        // 32-bit types (6 branches)
81        alt((
82            tag("ubelong"),
83            tag("ulelong"),
84            tag("ulong"),
85            tag("belong"),
86            tag("lelong"),
87            tag("long"),
88        )),
89        // 16-bit types (6 branches)
90        alt((
91            tag("ubeshort"),
92            tag("uleshort"),
93            tag("ushort"),
94            tag("beshort"),
95            tag("leshort"),
96            tag("short"),
97        )),
98        // 8-bit types (2 branches)
99        alt((tag("ubyte"), tag("byte"))),
100        // Float/double types (6 branches)
101        alt((
102            tag("bedouble"),
103            tag("ledouble"),
104            tag("double"),
105            tag("befloat"),
106            tag("lefloat"),
107            tag("float"),
108        )),
109        // Date types -- 32-bit (date) and 64-bit (qdate)
110        alt((
111            tag("beqldate"),
112            tag("leqldate"),
113            tag("beqdate"),
114            tag("leqdate"),
115            tag("qldate"),
116            tag("qdate"),
117            tag("beldate"),
118            tag("leldate"),
119            tag("bedate"),
120            tag("ldate"),
121            tag("ledate"),
122            tag("date"),
123        )),
124        // String types (and regex/search, which share the string-type family).
125        //
126        // `lestring16`/`bestring16` are listed before `string` because nom
127        // tries each tag in order and we need the longer keyword to win when
128        // both could plausibly match -- in practice the prefixes (`lestring`,
129        // `bestring`) don't collide with anything else, but ordering by
130        // length is the safer pattern as more keywords are added.
131        alt((
132            tag("lestring16"),
133            tag("bestring16"),
134            tag("pstring"),
135            tag("search"),
136            tag("regex"),
137            tag("string"),
138        )),
139        // Meta / control-flow directives. `indirect` is listed first so the
140        // longest match is tried before `default`, `clear`, `name`, `use`;
141        // none of these collide with other supported keywords.
142        //
143        // `offset` is recognized here so the parser can accept magic files
144        // that use it (e.g. `searchbug.magic`). It maps to
145        // `TypeKind::Meta(MetaType::Offset)` and is fully evaluated by the
146        // engine: the resolved offset is emitted as `Value::Uint(position)`
147        // and participates in printf-style format substitution.
148        alt((
149            tag("indirect"),
150            tag("default"),
151            tag("offset"),
152            tag("clear"),
153            tag("name"),
154            tag("use"),
155        )),
156    ))
157    .parse(input)
158}
159
160/// Convert a type keyword string to its corresponding [`TypeKind`]
161///
162/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
163/// appropriate `TypeKind` variant with correct endianness and signedness settings.
164///
165/// # Conventions
166///
167/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
168/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
169/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
170/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
171/// - No endian prefix means native endianness
172///
173/// Returns `Ok(None)` for `regex` and `search`, which cannot be constructed
174/// from the keyword alone -- they require suffix parsing (flags/count
175/// for regex, mandatory `NonZeroUsize` range for search) that only
176/// happens in `parser::grammar::parse_type_and_operator`. Callers that
177/// need a complete `TypeKind::Regex` or `TypeKind::Search` must build
178/// it directly in the grammar layer, not via this function.
179///
180/// # Examples
181///
182/// ```
183/// use libmagic_rs::parser::types::type_keyword_to_kind;
184/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
185///
186/// assert_eq!(type_keyword_to_kind("byte"), Ok(Some(TypeKind::Byte { signed: true })));
187/// assert_eq!(type_keyword_to_kind("ubyte"), Ok(Some(TypeKind::Byte { signed: false })));
188/// assert_eq!(
189///     type_keyword_to_kind("beshort"),
190///     Ok(Some(TypeKind::Short { endian: Endianness::Big, signed: true }))
191/// );
192/// // regex/search require suffix parsing, so the keyword alone returns Ok(None).
193/// assert_eq!(type_keyword_to_kind("regex"), Ok(None));
194/// assert_eq!(type_keyword_to_kind("search"), Ok(None));
195/// // Unknown keywords return a structured error.
196/// assert!(type_keyword_to_kind("bogus").is_err());
197/// ```
198///
199/// # Returns
200///
201/// * `Ok(Some(TypeKind))` for fully-specified keywords (byte, short, long,
202///   quad, float, double, date, qdate, string, pstring and all their
203///   variants).
204/// * `Ok(None)` for suffix-required keywords (`regex`, `search`) which
205///   cannot be converted from the keyword alone -- the grammar layer
206///   builds their `TypeKind` directly after parsing the suffix.
207/// * `Err(UnknownTypeKeyword)` if `type_name` is not a recognized
208///   keyword. Under normal control flow this only happens when a caller
209///   bypasses [`parse_type_keyword`] (which is the only supported way
210///   to produce valid input for this function).
211///
212/// # Errors
213///
214/// Returns [`UnknownTypeKeyword`] when `type_name` is not one of the
215/// keywords recognized by [`parse_type_keyword`]. This replaces a prior
216/// `unreachable!` panic; library code must never panic on untrusted
217/// input, and the structured error lets callers translate the failure
218/// into their own error type (e.g. a nom parse error or a richer
219/// `ParseError::InvalidType`).
220pub fn type_keyword_to_kind(type_name: &str) -> Result<Option<TypeKind>, UnknownTypeKeyword> {
221    // `regex` and `search` cannot be constructed from the keyword alone.
222    // They require suffix parsing (flags/count for regex, mandatory
223    // `NonZeroUsize` range for search) which only happens in
224    // `parse_type_and_operator` in grammar/mod.rs. Returning `None`
225    // here makes the "keyword alone isn't enough" invariant
226    // type-enforced instead of relying on a placeholder that the
227    // grammar layer is expected to overwrite.
228    //
229    // `name` and `use` also return `Ok(None)` because their identifier
230    // suffix is parsed in the grammar layer, following the same
231    // "keyword alone isn't enough" pattern.
232    if matches!(type_name, "regex" | "search" | "name" | "use") {
233        return Ok(None);
234    }
235
236    // Meta / control-flow directives with no trailing operand are fully
237    // specified by the keyword alone. `offset` maps to
238    // `MetaType::Offset` which the engine evaluates by emitting the
239    // resolved file position as `Value::Uint` for format substitution.
240    match type_name {
241        "default" => return Ok(Some(TypeKind::Meta(MetaType::Default))),
242        "clear" => return Ok(Some(TypeKind::Meta(MetaType::Clear))),
243        "indirect" => return Ok(Some(TypeKind::Meta(MetaType::Indirect))),
244        "offset" => return Ok(Some(TypeKind::Meta(MetaType::Offset))),
245        _ => {}
246    }
247
248    if let Some(kind) = byte_family(type_name)
249        .or_else(|| short_family(type_name))
250        .or_else(|| long_family(type_name))
251        .or_else(|| quad_family(type_name))
252        .or_else(|| float_family(type_name))
253        .or_else(|| double_family(type_name))
254        .or_else(|| date_family(type_name))
255        .or_else(|| qdate_family(type_name))
256        .or_else(|| string_family(type_name))
257        .or_else(|| string16_family(type_name))
258    {
259        return Ok(Some(kind));
260    }
261
262    Err(UnknownTypeKeyword {
263        keyword: type_name.to_string(),
264    })
265}
266
267/// Map a byte-family keyword (`byte`, `ubyte`) to its `TypeKind`.
268fn byte_family(name: &str) -> Option<TypeKind> {
269    match name {
270        "byte" => Some(TypeKind::Byte { signed: true }),
271        "ubyte" => Some(TypeKind::Byte { signed: false }),
272        _ => None,
273    }
274}
275
276/// Map a short-family keyword (`short`/`ushort`/`beshort`/...) to its `TypeKind`.
277fn short_family(name: &str) -> Option<TypeKind> {
278    let (endian, signed) = match name {
279        "short" => (Endianness::Native, true),
280        "ushort" => (Endianness::Native, false),
281        "leshort" => (Endianness::Little, true),
282        "uleshort" => (Endianness::Little, false),
283        "beshort" => (Endianness::Big, true),
284        "ubeshort" => (Endianness::Big, false),
285        _ => return None,
286    };
287    Some(TypeKind::Short { endian, signed })
288}
289
290/// Map a long-family keyword (`long`/`ulong`/`belong`/...) to its `TypeKind`.
291fn long_family(name: &str) -> Option<TypeKind> {
292    let (endian, signed) = match name {
293        "long" => (Endianness::Native, true),
294        "ulong" => (Endianness::Native, false),
295        "lelong" => (Endianness::Little, true),
296        "ulelong" => (Endianness::Little, false),
297        "belong" => (Endianness::Big, true),
298        "ubelong" => (Endianness::Big, false),
299        _ => return None,
300    };
301    Some(TypeKind::Long { endian, signed })
302}
303
304/// Map a quad-family keyword (`quad`/`uquad`/`bequad`/...) to its `TypeKind`.
305fn quad_family(name: &str) -> Option<TypeKind> {
306    let (endian, signed) = match name {
307        "quad" => (Endianness::Native, true),
308        "uquad" => (Endianness::Native, false),
309        "lequad" => (Endianness::Little, true),
310        "ulequad" => (Endianness::Little, false),
311        "bequad" => (Endianness::Big, true),
312        "ubequad" => (Endianness::Big, false),
313        _ => return None,
314    };
315    Some(TypeKind::Quad { endian, signed })
316}
317
318/// Map a float-family keyword (`float`/`befloat`/`lefloat`) to its `TypeKind`.
319fn float_family(name: &str) -> Option<TypeKind> {
320    let endian = match name {
321        "float" => Endianness::Native,
322        "befloat" => Endianness::Big,
323        "lefloat" => Endianness::Little,
324        _ => return None,
325    };
326    Some(TypeKind::Float { endian })
327}
328
329/// Map a double-family keyword (`double`/`bedouble`/`ledouble`) to its `TypeKind`.
330fn double_family(name: &str) -> Option<TypeKind> {
331    let endian = match name {
332        "double" => Endianness::Native,
333        "bedouble" => Endianness::Big,
334        "ledouble" => Endianness::Little,
335        _ => return None,
336    };
337    Some(TypeKind::Double { endian })
338}
339
340/// Map a 32-bit date keyword (`date`/`ldate`/`bedate`/...) to its `TypeKind`.
341fn date_family(name: &str) -> Option<TypeKind> {
342    let (endian, utc) = match name {
343        "date" => (Endianness::Native, true),
344        "ldate" => (Endianness::Native, false),
345        "bedate" => (Endianness::Big, true),
346        "beldate" => (Endianness::Big, false),
347        "ledate" => (Endianness::Little, true),
348        "leldate" => (Endianness::Little, false),
349        _ => return None,
350    };
351    Some(TypeKind::Date { endian, utc })
352}
353
354/// Map a 64-bit date keyword (`qdate`/`qldate`/`beqdate`/...) to its `TypeKind`.
355fn qdate_family(name: &str) -> Option<TypeKind> {
356    let (endian, utc) = match name {
357        "qdate" => (Endianness::Native, true),
358        "qldate" => (Endianness::Native, false),
359        "beqdate" => (Endianness::Big, true),
360        "beqldate" => (Endianness::Big, false),
361        "leqdate" => (Endianness::Little, true),
362        "leqldate" => (Endianness::Little, false),
363        _ => return None,
364    };
365    Some(TypeKind::QDate { endian, utc })
366}
367
368/// Map a string-family keyword (`string`, `pstring`) to its `TypeKind`.
369///
370/// `pstring` defaults to a 1-byte length prefix; the grammar layer
371/// overwrites `length_width` / `length_includes_itself` from any
372/// trailing `/B`/`/H`/`/h`/`/L`/`/l`/`/J` suffix.
373fn string_family(name: &str) -> Option<TypeKind> {
374    match name {
375        "string" => Some(TypeKind::String {
376            max_length: None,
377            flags: StringFlags::default(),
378        }),
379        "pstring" => Some(TypeKind::PString {
380            max_length: None,
381            length_width: PStringLengthWidth::OneByte,
382            length_includes_itself: false,
383        }),
384        _ => None,
385    }
386}
387
388/// Map a UCS-2 string keyword (`lestring16`/`bestring16`) to its `TypeKind`.
389///
390/// magic(5) defines only the explicitly-endian forms; bare `string16` is not
391/// a valid keyword.
392fn string16_family(name: &str) -> Option<TypeKind> {
393    match name {
394        "lestring16" => Some(TypeKind::String16 {
395            endian: Endianness::Little,
396        }),
397        "bestring16" => Some(TypeKind::String16 {
398            endian: Endianness::Big,
399        }),
400        _ => None,
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use crate::parser::ast::Endianness;
408
409    // ============================================================
410    // parse_type_keyword tests
411    // ============================================================
412
413    #[test]
414    fn test_parse_type_keyword_byte_variants() {
415        assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
416        assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
417    }
418
419    #[test]
420    fn test_parse_type_keyword_short_variants() {
421        let cases = [
422            ("short", "short"),
423            ("ushort", "ushort"),
424            ("leshort", "leshort"),
425            ("uleshort", "uleshort"),
426            ("beshort", "beshort"),
427            ("ubeshort", "ubeshort"),
428        ];
429        for (input, expected) in cases {
430            let input_with_rest = format!("{input} rest");
431            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
432            assert_eq!(keyword, expected, "Failed for input: {input}");
433            assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
434        }
435    }
436
437    #[test]
438    fn test_parse_type_keyword_long_variants() {
439        let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
440        for input in cases {
441            let input_with_rest = format!("{input} rest");
442            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
443            assert_eq!(keyword, input, "Failed for: {input}");
444            assert_eq!(rest, " rest");
445        }
446    }
447
448    #[test]
449    fn test_parse_type_keyword_quad_variants() {
450        let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
451        for input in cases {
452            let input_with_rest = format!("{input} rest");
453            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
454            assert_eq!(keyword, input, "Failed for: {input}");
455            assert_eq!(rest, " rest");
456        }
457    }
458
459    #[test]
460    fn test_parse_type_keyword_string() {
461        assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
462    }
463
464    #[test]
465    fn test_parse_type_keyword_unknown() {
466        assert!(parse_type_keyword("unknown rest").is_err());
467    }
468
469    #[test]
470    fn test_parse_type_keyword_empty() {
471        assert!(parse_type_keyword("").is_err());
472    }
473
474    // ============================================================
475    // type_keyword_to_kind tests
476    // ============================================================
477
478    #[test]
479    fn test_type_keyword_to_kind_byte() {
480        assert_eq!(
481            type_keyword_to_kind("byte"),
482            Ok(Some(TypeKind::Byte { signed: true }))
483        );
484        assert_eq!(
485            type_keyword_to_kind("ubyte"),
486            Ok(Some(TypeKind::Byte { signed: false }))
487        );
488    }
489
490    #[test]
491    fn test_type_keyword_to_kind_short_endianness() {
492        assert_eq!(
493            type_keyword_to_kind("short"),
494            Ok(Some(TypeKind::Short {
495                endian: Endianness::Native,
496                signed: true
497            }))
498        );
499        assert_eq!(
500            type_keyword_to_kind("leshort"),
501            Ok(Some(TypeKind::Short {
502                endian: Endianness::Little,
503                signed: true
504            }))
505        );
506        assert_eq!(
507            type_keyword_to_kind("beshort"),
508            Ok(Some(TypeKind::Short {
509                endian: Endianness::Big,
510                signed: true
511            }))
512        );
513    }
514
515    #[test]
516    fn test_type_keyword_to_kind_unsigned_variants() {
517        assert_eq!(
518            type_keyword_to_kind("ushort"),
519            Ok(Some(TypeKind::Short {
520                endian: Endianness::Native,
521                signed: false
522            }))
523        );
524        assert_eq!(
525            type_keyword_to_kind("ulong"),
526            Ok(Some(TypeKind::Long {
527                endian: Endianness::Native,
528                signed: false
529            }))
530        );
531        assert_eq!(
532            type_keyword_to_kind("uquad"),
533            Ok(Some(TypeKind::Quad {
534                endian: Endianness::Native,
535                signed: false
536            }))
537        );
538    }
539
540    #[test]
541    fn test_type_keyword_to_kind_signed_defaults() {
542        // libmagic types are signed by default
543        assert_eq!(
544            type_keyword_to_kind("long"),
545            Ok(Some(TypeKind::Long {
546                endian: Endianness::Native,
547                signed: true
548            }))
549        );
550        assert_eq!(
551            type_keyword_to_kind("quad"),
552            Ok(Some(TypeKind::Quad {
553                endian: Endianness::Native,
554                signed: true
555            }))
556        );
557    }
558
559    #[test]
560    fn test_type_keyword_to_kind_string() {
561        assert_eq!(
562            type_keyword_to_kind("string"),
563            Ok(Some(TypeKind::String {
564                max_length: None,
565                flags: StringFlags::default()
566            }))
567        );
568    }
569
570    #[test]
571    fn test_parse_type_keyword_pstring() {
572        assert_eq!(parse_type_keyword("pstring rest"), Ok((" rest", "pstring")));
573    }
574
575    #[test]
576    fn test_type_keyword_to_kind_pstring() {
577        assert_eq!(
578            type_keyword_to_kind("pstring"),
579            Ok(Some(TypeKind::PString {
580                max_length: None,
581                length_width: PStringLengthWidth::OneByte,
582                length_includes_itself: false
583            }))
584        );
585    }
586
587    #[test]
588    fn test_type_keyword_to_kind_regex_and_search_return_none() {
589        // regex and search require suffix parsing (flags/count/range)
590        // that only happens in grammar/mod.rs. The keyword-to-kind
591        // function deliberately returns Ok(None) for them so callers
592        // are forced to use the grammar layer's direct construction.
593        assert_eq!(type_keyword_to_kind("regex"), Ok(None));
594        assert_eq!(type_keyword_to_kind("search"), Ok(None));
595    }
596
597    #[test]
598    fn test_type_keyword_to_kind_unknown_returns_err() {
599        // Unknown keywords produce a structured error instead of a
600        // panic. This path is not reachable through `parse_type_keyword`
601        // (which rejects unknown keywords before this function runs),
602        // but it is reachable if a caller constructs the input string
603        // directly, so the error must be representable.
604        let err = type_keyword_to_kind("nonexistent").expect_err("unknown keyword must return Err");
605        assert_eq!(err.keyword, "nonexistent");
606        // And the Display impl mentions the keyword for debuggability.
607        assert!(err.to_string().contains("nonexistent"));
608    }
609
610    #[test]
611    fn test_pstring_keyword_defaults_to_one_byte_width() {
612        // pstring keyword alone should produce OneByte length_width
613        // (suffix parsing is handled by grammar/mod.rs, not types.rs)
614        let kind = type_keyword_to_kind("pstring")
615            .expect("pstring is a known keyword")
616            .expect("pstring maps to Some(TypeKind)");
617        match kind {
618            TypeKind::PString {
619                max_length,
620                length_width,
621                length_includes_itself: _,
622            } => {
623                assert_eq!(
624                    max_length, None,
625                    "pstring default should have no max_length"
626                );
627                assert_eq!(
628                    length_width,
629                    PStringLengthWidth::OneByte,
630                    "pstring default should be OneByte"
631                );
632            }
633            _ => panic!("Expected TypeKind::PString, got {kind:?}"),
634        }
635    }
636
637    #[test]
638    fn test_pstring_keyword_does_not_consume_suffix() {
639        // parse_type_keyword should only consume "pstring", leaving suffix for grammar
640        let (rest, keyword) = parse_type_keyword("pstring/H =value").unwrap();
641        assert_eq!(keyword, "pstring");
642        assert_eq!(
643            rest, "/H =value",
644            "Suffix should remain unconsumed by type keyword parser"
645        );
646    }
647
648    #[test]
649    fn test_pstring_keyword_boundary() {
650        // pstring at exact boundary (no trailing input)
651        let (rest, keyword) = parse_type_keyword("pstring").unwrap();
652        assert_eq!(keyword, "pstring");
653        assert_eq!(rest, "");
654    }
655
656    #[test]
657    fn test_pstring_before_operator() {
658        // pstring followed by whitespace then operator
659        let (rest, keyword) = parse_type_keyword("pstring =hello").unwrap();
660        assert_eq!(keyword, "pstring");
661        assert_eq!(rest, " =hello");
662    }
663
664    #[test]
665    fn test_parse_type_keyword_string16_variants() {
666        let (rest, kw) = parse_type_keyword("lestring16 rest").unwrap();
667        assert_eq!(kw, "lestring16");
668        assert_eq!(rest, " rest");
669
670        let (rest, kw) = parse_type_keyword("bestring16 rest").unwrap();
671        assert_eq!(kw, "bestring16");
672        assert_eq!(rest, " rest");
673    }
674
675    #[test]
676    fn test_string16_keyword_to_kind() {
677        assert_eq!(
678            type_keyword_to_kind("lestring16"),
679            Ok(Some(TypeKind::String16 {
680                endian: Endianness::Little,
681            }))
682        );
683        assert_eq!(
684            type_keyword_to_kind("bestring16"),
685            Ok(Some(TypeKind::String16 {
686                endian: Endianness::Big,
687            }))
688        );
689    }
690
691    #[test]
692    fn test_roundtrip_all_keywords() {
693        // Verify that every keyword parsed by parse_type_keyword can be
694        // converted to a TypeKind by type_keyword_to_kind. Regex and
695        // search are excluded from the conversion side because they
696        // require suffix parsing in grammar/mod.rs and deliberately
697        // return None from `type_keyword_to_kind`; the keyword
698        // parser still recognizes them.
699        let convertible_keywords = [
700            "byte",
701            "ubyte",
702            "short",
703            "ushort",
704            "leshort",
705            "uleshort",
706            "beshort",
707            "ubeshort",
708            "long",
709            "ulong",
710            "lelong",
711            "ulelong",
712            "belong",
713            "ubelong",
714            "quad",
715            "uquad",
716            "lequad",
717            "ulequad",
718            "bequad",
719            "ubequad",
720            "float",
721            "befloat",
722            "lefloat",
723            "double",
724            "bedouble",
725            "ledouble",
726            "date",
727            "ldate",
728            "bedate",
729            "beldate",
730            "ledate",
731            "leldate",
732            "qdate",
733            "qldate",
734            "beqdate",
735            "beqldate",
736            "leqdate",
737            "leqldate",
738            "pstring",
739            "string",
740            "lestring16",
741            "bestring16",
742            "default",
743            "clear",
744            "indirect",
745            "offset",
746        ];
747        for keyword in convertible_keywords {
748            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
749            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
750            assert!(
751                type_keyword_to_kind(parsed).is_ok_and(|o| o.is_some()),
752                "{keyword} should map to Ok(Some(TypeKind))"
753            );
754        }
755        // regex, search, name, and use are recognized by parse_type_keyword
756        // but require grammar-layer suffix parsing (flags/count/range or an
757        // identifier) to construct their TypeKind. Verify both sides of
758        // this split invariant.
759        for keyword in ["regex", "search", "name", "use"] {
760            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
761            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
762            assert_eq!(
763                type_keyword_to_kind(parsed),
764                Ok(None),
765                "{keyword} should return Ok(None) from keyword-to-kind"
766            );
767        }
768    }
769}