Skip to main content

libmagic_rs/parser/
types.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, MetaType, PStringLengthWidth, TypeKind};
15
16/// Error returned by [`type_keyword_to_kind`] when the supplied keyword is
17/// not a recognized magic type keyword.
18///
19/// This is a tight, structured error surfaced from a pure mapping function
20/// that has no access to line-number context. Callers that *do* have line
21/// context (e.g. the grammar layer wrapping a higher-level parse) can
22/// convert it into a richer [`crate::error::ParseError`] variant if needed.
23/// The struct is `#[non_exhaustive]` so future fields (e.g. suggested
24/// alternatives) can be added without a major version bump.
25///
26/// # Examples
27///
28/// ```
29/// use libmagic_rs::parser::types::{type_keyword_to_kind, UnknownTypeKeyword};
30///
31/// let err = type_keyword_to_kind("notarealtype").unwrap_err();
32/// assert_eq!(err.keyword, "notarealtype");
33/// ```
34#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
35#[non_exhaustive]
36#[error("unknown type keyword: {keyword}")]
37pub struct UnknownTypeKeyword {
38    /// The keyword string that was not recognized.
39    pub keyword: String,
40}
41
42/// Parse a type keyword from magic file input
43///
44/// Recognizes all supported type keywords and returns the matched keyword string.
45/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
46/// prefixes matched first within each group to avoid ambiguous partial matches.
47///
48/// # Supported Keywords
49///
50/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
51/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
52/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
53/// - 8-bit: `ubyte`, `byte`
54/// - String: `pstring`, `string`
55///
56/// # Examples
57///
58/// ```
59/// use libmagic_rs::parser::types::parse_type_keyword;
60///
61/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
62/// assert_eq!(keyword, "bequad");
63/// assert_eq!(rest, " rest");
64/// ```
65///
66/// # Errors
67///
68/// Returns a nom parsing error if the input doesn't start with a known type keyword.
69pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
70    alt((
71        // 64-bit types (6 branches)
72        alt((
73            tag("ubequad"),
74            tag("ulequad"),
75            tag("uquad"),
76            tag("bequad"),
77            tag("lequad"),
78            tag("quad"),
79        )),
80        // 32-bit types (6 branches)
81        alt((
82            tag("ubelong"),
83            tag("ulelong"),
84            tag("ulong"),
85            tag("belong"),
86            tag("lelong"),
87            tag("long"),
88        )),
89        // 16-bit types (6 branches)
90        alt((
91            tag("ubeshort"),
92            tag("uleshort"),
93            tag("ushort"),
94            tag("beshort"),
95            tag("leshort"),
96            tag("short"),
97        )),
98        // 8-bit types (2 branches)
99        alt((tag("ubyte"), tag("byte"))),
100        // Float/double types (6 branches)
101        alt((
102            tag("bedouble"),
103            tag("ledouble"),
104            tag("double"),
105            tag("befloat"),
106            tag("lefloat"),
107            tag("float"),
108        )),
109        // Date types -- 32-bit (date) and 64-bit (qdate)
110        alt((
111            tag("beqldate"),
112            tag("leqldate"),
113            tag("beqdate"),
114            tag("leqdate"),
115            tag("qldate"),
116            tag("qdate"),
117            tag("beldate"),
118            tag("leldate"),
119            tag("bedate"),
120            tag("ldate"),
121            tag("ledate"),
122            tag("date"),
123        )),
124        // String types (and regex/search, which share the string-type family).
125        //
126        // `lestring16`/`bestring16` are listed before `string` because nom
127        // tries each tag in order and we need the longer keyword to win when
128        // both could plausibly match -- in practice the prefixes (`lestring`,
129        // `bestring`) don't collide with anything else, but ordering by
130        // length is the safer pattern as more keywords are added.
131        alt((
132            tag("lestring16"),
133            tag("bestring16"),
134            tag("pstring"),
135            tag("search"),
136            tag("regex"),
137            tag("string"),
138        )),
139        // Meta / control-flow directives. `indirect` is listed first so the
140        // longest match is tried before `default`, `clear`, `name`, `use`;
141        // none of these collide with other supported keywords.
142        //
143        // `offset` is recognized here so the parser can accept magic files
144        // that use it (e.g. `searchbug.magic`). It maps to
145        // `TypeKind::Meta(MetaType::Offset)` and is fully evaluated by the
146        // engine: the resolved offset is emitted as `Value::Uint(position)`
147        // and participates in printf-style format substitution.
148        alt((
149            tag("indirect"),
150            tag("default"),
151            tag("offset"),
152            tag("clear"),
153            tag("name"),
154            tag("use"),
155        )),
156    ))
157    .parse(input)
158}
159
160/// Convert a type keyword string to its corresponding [`TypeKind`]
161///
162/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
163/// appropriate `TypeKind` variant with correct endianness and signedness settings.
164///
165/// # Conventions
166///
167/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
168/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
169/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
170/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
171/// - No endian prefix means native endianness
172///
173/// Returns `Ok(None)` for `regex` and `search`, which cannot be constructed
174/// from the keyword alone -- they require suffix parsing (flags/count
175/// for regex, mandatory `NonZeroUsize` range for search) that only
176/// happens in `parser::grammar::parse_type_and_operator`. Callers that
177/// need a complete `TypeKind::Regex` or `TypeKind::Search` must build
178/// it directly in the grammar layer, not via this function.
179///
180/// # Examples
181///
182/// ```
183/// use libmagic_rs::parser::types::type_keyword_to_kind;
184/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
185///
186/// assert_eq!(type_keyword_to_kind("byte"), Ok(Some(TypeKind::Byte { signed: true })));
187/// assert_eq!(type_keyword_to_kind("ubyte"), Ok(Some(TypeKind::Byte { signed: false })));
188/// assert_eq!(
189///     type_keyword_to_kind("beshort"),
190///     Ok(Some(TypeKind::Short { endian: Endianness::Big, signed: true }))
191/// );
192/// // regex/search require suffix parsing, so the keyword alone returns Ok(None).
193/// assert_eq!(type_keyword_to_kind("regex"), Ok(None));
194/// assert_eq!(type_keyword_to_kind("search"), Ok(None));
195/// // Unknown keywords return a structured error.
196/// assert!(type_keyword_to_kind("bogus").is_err());
197/// ```
198///
199/// # Returns
200///
201/// * `Ok(Some(TypeKind))` for fully-specified keywords (byte, short, long,
202///   quad, float, double, date, qdate, string, pstring and all their
203///   variants).
204/// * `Ok(None)` for suffix-required keywords (`regex`, `search`) which
205///   cannot be converted from the keyword alone -- the grammar layer
206///   builds their `TypeKind` directly after parsing the suffix.
207/// * `Err(UnknownTypeKeyword)` if `type_name` is not a recognized
208///   keyword. Under normal control flow this only happens when a caller
209///   bypasses [`parse_type_keyword`] (which is the only supported way
210///   to produce valid input for this function).
211///
212/// # Errors
213///
214/// Returns [`UnknownTypeKeyword`] when `type_name` is not one of the
215/// keywords recognized by [`parse_type_keyword`]. This replaces a prior
216/// `unreachable!` panic; library code must never panic on untrusted
217/// input, and the structured error lets callers translate the failure
218/// into their own error type (e.g. a nom parse error or a richer
219/// `ParseError::InvalidType`).
220pub fn type_keyword_to_kind(type_name: &str) -> Result<Option<TypeKind>, UnknownTypeKeyword> {
221    // `regex` and `search` cannot be constructed from the keyword alone.
222    // They require suffix parsing (flags/count for regex, mandatory
223    // `NonZeroUsize` range for search) which only happens in
224    // `parse_type_and_operator` in grammar/mod.rs. Returning `None`
225    // here makes the "keyword alone isn't enough" invariant
226    // type-enforced instead of relying on a placeholder that the
227    // grammar layer is expected to overwrite.
228    //
229    // `name` and `use` also return `Ok(None)` because their identifier
230    // suffix is parsed in the grammar layer, following the same
231    // "keyword alone isn't enough" pattern.
232    if matches!(type_name, "regex" | "search" | "name" | "use") {
233        return Ok(None);
234    }
235
236    // Meta / control-flow directives with no trailing operand are fully
237    // specified by the keyword alone. `offset` maps to
238    // `MetaType::Offset` which the engine evaluates by emitting the
239    // resolved file position as `Value::Uint` for format substitution.
240    match type_name {
241        "default" => return Ok(Some(TypeKind::Meta(MetaType::Default))),
242        "clear" => return Ok(Some(TypeKind::Meta(MetaType::Clear))),
243        "indirect" => return Ok(Some(TypeKind::Meta(MetaType::Indirect))),
244        "offset" => return Ok(Some(TypeKind::Meta(MetaType::Offset))),
245        _ => {}
246    }
247
248    if let Some(kind) = byte_family(type_name)
249        .or_else(|| short_family(type_name))
250        .or_else(|| long_family(type_name))
251        .or_else(|| quad_family(type_name))
252        .or_else(|| float_family(type_name))
253        .or_else(|| double_family(type_name))
254        .or_else(|| date_family(type_name))
255        .or_else(|| qdate_family(type_name))
256        .or_else(|| string_family(type_name))
257        .or_else(|| string16_family(type_name))
258    {
259        return Ok(Some(kind));
260    }
261
262    Err(UnknownTypeKeyword {
263        keyword: type_name.to_string(),
264    })
265}
266
267/// Map a byte-family keyword (`byte`, `ubyte`) to its `TypeKind`.
268fn byte_family(name: &str) -> Option<TypeKind> {
269    match name {
270        "byte" => Some(TypeKind::Byte { signed: true }),
271        "ubyte" => Some(TypeKind::Byte { signed: false }),
272        _ => None,
273    }
274}
275
276/// Map a short-family keyword (`short`/`ushort`/`beshort`/...) to its `TypeKind`.
277fn short_family(name: &str) -> Option<TypeKind> {
278    let (endian, signed) = match name {
279        "short" => (Endianness::Native, true),
280        "ushort" => (Endianness::Native, false),
281        "leshort" => (Endianness::Little, true),
282        "uleshort" => (Endianness::Little, false),
283        "beshort" => (Endianness::Big, true),
284        "ubeshort" => (Endianness::Big, false),
285        _ => return None,
286    };
287    Some(TypeKind::Short { endian, signed })
288}
289
290/// Map a long-family keyword (`long`/`ulong`/`belong`/...) to its `TypeKind`.
291fn long_family(name: &str) -> Option<TypeKind> {
292    let (endian, signed) = match name {
293        "long" => (Endianness::Native, true),
294        "ulong" => (Endianness::Native, false),
295        "lelong" => (Endianness::Little, true),
296        "ulelong" => (Endianness::Little, false),
297        "belong" => (Endianness::Big, true),
298        "ubelong" => (Endianness::Big, false),
299        _ => return None,
300    };
301    Some(TypeKind::Long { endian, signed })
302}
303
304/// Map a quad-family keyword (`quad`/`uquad`/`bequad`/...) to its `TypeKind`.
305fn quad_family(name: &str) -> Option<TypeKind> {
306    let (endian, signed) = match name {
307        "quad" => (Endianness::Native, true),
308        "uquad" => (Endianness::Native, false),
309        "lequad" => (Endianness::Little, true),
310        "ulequad" => (Endianness::Little, false),
311        "bequad" => (Endianness::Big, true),
312        "ubequad" => (Endianness::Big, false),
313        _ => return None,
314    };
315    Some(TypeKind::Quad { endian, signed })
316}
317
318/// Map a float-family keyword (`float`/`befloat`/`lefloat`) to its `TypeKind`.
319fn float_family(name: &str) -> Option<TypeKind> {
320    let endian = match name {
321        "float" => Endianness::Native,
322        "befloat" => Endianness::Big,
323        "lefloat" => Endianness::Little,
324        _ => return None,
325    };
326    Some(TypeKind::Float { endian })
327}
328
329/// Map a double-family keyword (`double`/`bedouble`/`ledouble`) to its `TypeKind`.
330fn double_family(name: &str) -> Option<TypeKind> {
331    let endian = match name {
332        "double" => Endianness::Native,
333        "bedouble" => Endianness::Big,
334        "ledouble" => Endianness::Little,
335        _ => return None,
336    };
337    Some(TypeKind::Double { endian })
338}
339
340/// Map a 32-bit date keyword (`date`/`ldate`/`bedate`/...) to its `TypeKind`.
341fn date_family(name: &str) -> Option<TypeKind> {
342    let (endian, utc) = match name {
343        "date" => (Endianness::Native, true),
344        "ldate" => (Endianness::Native, false),
345        "bedate" => (Endianness::Big, true),
346        "beldate" => (Endianness::Big, false),
347        "ledate" => (Endianness::Little, true),
348        "leldate" => (Endianness::Little, false),
349        _ => return None,
350    };
351    Some(TypeKind::Date { endian, utc })
352}
353
354/// Map a 64-bit date keyword (`qdate`/`qldate`/`beqdate`/...) to its `TypeKind`.
355fn qdate_family(name: &str) -> Option<TypeKind> {
356    let (endian, utc) = match name {
357        "qdate" => (Endianness::Native, true),
358        "qldate" => (Endianness::Native, false),
359        "beqdate" => (Endianness::Big, true),
360        "beqldate" => (Endianness::Big, false),
361        "leqdate" => (Endianness::Little, true),
362        "leqldate" => (Endianness::Little, false),
363        _ => return None,
364    };
365    Some(TypeKind::QDate { endian, utc })
366}
367
368/// Map a string-family keyword (`string`, `pstring`) to its `TypeKind`.
369///
370/// `pstring` defaults to a 1-byte length prefix; the grammar layer
371/// overwrites `length_width` / `length_includes_itself` from any
372/// trailing `/B`/`/H`/`/h`/`/L`/`/l`/`/J` suffix.
373fn string_family(name: &str) -> Option<TypeKind> {
374    match name {
375        "string" => Some(TypeKind::String { max_length: None }),
376        "pstring" => Some(TypeKind::PString {
377            max_length: None,
378            length_width: PStringLengthWidth::OneByte,
379            length_includes_itself: false,
380        }),
381        _ => None,
382    }
383}
384
385/// Map a UCS-2 string keyword (`lestring16`/`bestring16`) to its `TypeKind`.
386///
387/// magic(5) defines only the explicitly-endian forms; bare `string16` is not
388/// a valid keyword.
389fn string16_family(name: &str) -> Option<TypeKind> {
390    match name {
391        "lestring16" => Some(TypeKind::String16 {
392            endian: Endianness::Little,
393        }),
394        "bestring16" => Some(TypeKind::String16 {
395            endian: Endianness::Big,
396        }),
397        _ => None,
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use crate::parser::ast::Endianness;
405
406    // ============================================================
407    // parse_type_keyword tests
408    // ============================================================
409
410    #[test]
411    fn test_parse_type_keyword_byte_variants() {
412        assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
413        assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
414    }
415
416    #[test]
417    fn test_parse_type_keyword_short_variants() {
418        let cases = [
419            ("short", "short"),
420            ("ushort", "ushort"),
421            ("leshort", "leshort"),
422            ("uleshort", "uleshort"),
423            ("beshort", "beshort"),
424            ("ubeshort", "ubeshort"),
425        ];
426        for (input, expected) in cases {
427            let input_with_rest = format!("{input} rest");
428            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
429            assert_eq!(keyword, expected, "Failed for input: {input}");
430            assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
431        }
432    }
433
434    #[test]
435    fn test_parse_type_keyword_long_variants() {
436        let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
437        for input in cases {
438            let input_with_rest = format!("{input} rest");
439            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
440            assert_eq!(keyword, input, "Failed for: {input}");
441            assert_eq!(rest, " rest");
442        }
443    }
444
445    #[test]
446    fn test_parse_type_keyword_quad_variants() {
447        let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
448        for input in cases {
449            let input_with_rest = format!("{input} rest");
450            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
451            assert_eq!(keyword, input, "Failed for: {input}");
452            assert_eq!(rest, " rest");
453        }
454    }
455
456    #[test]
457    fn test_parse_type_keyword_string() {
458        assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
459    }
460
461    #[test]
462    fn test_parse_type_keyword_unknown() {
463        assert!(parse_type_keyword("unknown rest").is_err());
464    }
465
466    #[test]
467    fn test_parse_type_keyword_empty() {
468        assert!(parse_type_keyword("").is_err());
469    }
470
471    // ============================================================
472    // type_keyword_to_kind tests
473    // ============================================================
474
475    #[test]
476    fn test_type_keyword_to_kind_byte() {
477        assert_eq!(
478            type_keyword_to_kind("byte"),
479            Ok(Some(TypeKind::Byte { signed: true }))
480        );
481        assert_eq!(
482            type_keyword_to_kind("ubyte"),
483            Ok(Some(TypeKind::Byte { signed: false }))
484        );
485    }
486
487    #[test]
488    fn test_type_keyword_to_kind_short_endianness() {
489        assert_eq!(
490            type_keyword_to_kind("short"),
491            Ok(Some(TypeKind::Short {
492                endian: Endianness::Native,
493                signed: true
494            }))
495        );
496        assert_eq!(
497            type_keyword_to_kind("leshort"),
498            Ok(Some(TypeKind::Short {
499                endian: Endianness::Little,
500                signed: true
501            }))
502        );
503        assert_eq!(
504            type_keyword_to_kind("beshort"),
505            Ok(Some(TypeKind::Short {
506                endian: Endianness::Big,
507                signed: true
508            }))
509        );
510    }
511
512    #[test]
513    fn test_type_keyword_to_kind_unsigned_variants() {
514        assert_eq!(
515            type_keyword_to_kind("ushort"),
516            Ok(Some(TypeKind::Short {
517                endian: Endianness::Native,
518                signed: false
519            }))
520        );
521        assert_eq!(
522            type_keyword_to_kind("ulong"),
523            Ok(Some(TypeKind::Long {
524                endian: Endianness::Native,
525                signed: false
526            }))
527        );
528        assert_eq!(
529            type_keyword_to_kind("uquad"),
530            Ok(Some(TypeKind::Quad {
531                endian: Endianness::Native,
532                signed: false
533            }))
534        );
535    }
536
537    #[test]
538    fn test_type_keyword_to_kind_signed_defaults() {
539        // libmagic types are signed by default
540        assert_eq!(
541            type_keyword_to_kind("long"),
542            Ok(Some(TypeKind::Long {
543                endian: Endianness::Native,
544                signed: true
545            }))
546        );
547        assert_eq!(
548            type_keyword_to_kind("quad"),
549            Ok(Some(TypeKind::Quad {
550                endian: Endianness::Native,
551                signed: true
552            }))
553        );
554    }
555
556    #[test]
557    fn test_type_keyword_to_kind_string() {
558        assert_eq!(
559            type_keyword_to_kind("string"),
560            Ok(Some(TypeKind::String { max_length: None }))
561        );
562    }
563
564    #[test]
565    fn test_parse_type_keyword_pstring() {
566        assert_eq!(parse_type_keyword("pstring rest"), Ok((" rest", "pstring")));
567    }
568
569    #[test]
570    fn test_type_keyword_to_kind_pstring() {
571        assert_eq!(
572            type_keyword_to_kind("pstring"),
573            Ok(Some(TypeKind::PString {
574                max_length: None,
575                length_width: PStringLengthWidth::OneByte,
576                length_includes_itself: false
577            }))
578        );
579    }
580
581    #[test]
582    fn test_type_keyword_to_kind_regex_and_search_return_none() {
583        // regex and search require suffix parsing (flags/count/range)
584        // that only happens in grammar/mod.rs. The keyword-to-kind
585        // function deliberately returns Ok(None) for them so callers
586        // are forced to use the grammar layer's direct construction.
587        assert_eq!(type_keyword_to_kind("regex"), Ok(None));
588        assert_eq!(type_keyword_to_kind("search"), Ok(None));
589    }
590
591    #[test]
592    fn test_type_keyword_to_kind_unknown_returns_err() {
593        // Unknown keywords produce a structured error instead of a
594        // panic. This path is not reachable through `parse_type_keyword`
595        // (which rejects unknown keywords before this function runs),
596        // but it is reachable if a caller constructs the input string
597        // directly, so the error must be representable.
598        let err = type_keyword_to_kind("nonexistent").expect_err("unknown keyword must return Err");
599        assert_eq!(err.keyword, "nonexistent");
600        // And the Display impl mentions the keyword for debuggability.
601        assert!(err.to_string().contains("nonexistent"));
602    }
603
604    #[test]
605    fn test_pstring_keyword_defaults_to_one_byte_width() {
606        // pstring keyword alone should produce OneByte length_width
607        // (suffix parsing is handled by grammar/mod.rs, not types.rs)
608        let kind = type_keyword_to_kind("pstring")
609            .expect("pstring is a known keyword")
610            .expect("pstring maps to Some(TypeKind)");
611        match kind {
612            TypeKind::PString {
613                max_length,
614                length_width,
615                length_includes_itself: _,
616            } => {
617                assert_eq!(
618                    max_length, None,
619                    "pstring default should have no max_length"
620                );
621                assert_eq!(
622                    length_width,
623                    PStringLengthWidth::OneByte,
624                    "pstring default should be OneByte"
625                );
626            }
627            _ => panic!("Expected TypeKind::PString, got {kind:?}"),
628        }
629    }
630
631    #[test]
632    fn test_pstring_keyword_does_not_consume_suffix() {
633        // parse_type_keyword should only consume "pstring", leaving suffix for grammar
634        let (rest, keyword) = parse_type_keyword("pstring/H =value").unwrap();
635        assert_eq!(keyword, "pstring");
636        assert_eq!(
637            rest, "/H =value",
638            "Suffix should remain unconsumed by type keyword parser"
639        );
640    }
641
642    #[test]
643    fn test_pstring_keyword_boundary() {
644        // pstring at exact boundary (no trailing input)
645        let (rest, keyword) = parse_type_keyword("pstring").unwrap();
646        assert_eq!(keyword, "pstring");
647        assert_eq!(rest, "");
648    }
649
650    #[test]
651    fn test_pstring_before_operator() {
652        // pstring followed by whitespace then operator
653        let (rest, keyword) = parse_type_keyword("pstring =hello").unwrap();
654        assert_eq!(keyword, "pstring");
655        assert_eq!(rest, " =hello");
656    }
657
658    #[test]
659    fn test_parse_type_keyword_string16_variants() {
660        let (rest, kw) = parse_type_keyword("lestring16 rest").unwrap();
661        assert_eq!(kw, "lestring16");
662        assert_eq!(rest, " rest");
663
664        let (rest, kw) = parse_type_keyword("bestring16 rest").unwrap();
665        assert_eq!(kw, "bestring16");
666        assert_eq!(rest, " rest");
667    }
668
669    #[test]
670    fn test_string16_keyword_to_kind() {
671        assert_eq!(
672            type_keyword_to_kind("lestring16"),
673            Ok(Some(TypeKind::String16 {
674                endian: Endianness::Little,
675            }))
676        );
677        assert_eq!(
678            type_keyword_to_kind("bestring16"),
679            Ok(Some(TypeKind::String16 {
680                endian: Endianness::Big,
681            }))
682        );
683    }
684
685    #[test]
686    fn test_roundtrip_all_keywords() {
687        // Verify that every keyword parsed by parse_type_keyword can be
688        // converted to a TypeKind by type_keyword_to_kind. Regex and
689        // search are excluded from the conversion side because they
690        // require suffix parsing in grammar/mod.rs and deliberately
691        // return None from `type_keyword_to_kind`; the keyword
692        // parser still recognizes them.
693        let convertible_keywords = [
694            "byte",
695            "ubyte",
696            "short",
697            "ushort",
698            "leshort",
699            "uleshort",
700            "beshort",
701            "ubeshort",
702            "long",
703            "ulong",
704            "lelong",
705            "ulelong",
706            "belong",
707            "ubelong",
708            "quad",
709            "uquad",
710            "lequad",
711            "ulequad",
712            "bequad",
713            "ubequad",
714            "float",
715            "befloat",
716            "lefloat",
717            "double",
718            "bedouble",
719            "ledouble",
720            "date",
721            "ldate",
722            "bedate",
723            "beldate",
724            "ledate",
725            "leldate",
726            "qdate",
727            "qldate",
728            "beqdate",
729            "beqldate",
730            "leqdate",
731            "leqldate",
732            "pstring",
733            "string",
734            "lestring16",
735            "bestring16",
736            "default",
737            "clear",
738            "indirect",
739            "offset",
740        ];
741        for keyword in convertible_keywords {
742            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
743            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
744            assert!(
745                type_keyword_to_kind(parsed).is_ok_and(|o| o.is_some()),
746                "{keyword} should map to Ok(Some(TypeKind))"
747            );
748        }
749        // regex, search, name, and use are recognized by parse_type_keyword
750        // but require grammar-layer suffix parsing (flags/count/range or an
751        // identifier) to construct their TypeKind. Verify both sides of
752        // this split invariant.
753        for keyword in ["regex", "search", "name", "use"] {
754            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
755            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
756            assert_eq!(
757                type_keyword_to_kind(parsed),
758                Ok(None),
759                "{keyword} should return Ok(None) from keyword-to-kind"
760            );
761        }
762    }
763}