Skip to main content

libmagic_rs/parser/
types.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, TypeKind};
15
16/// Parse a type keyword from magic file input
17///
18/// Recognizes all supported type keywords and returns the matched keyword string.
19/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
20/// prefixes matched first within each group to avoid ambiguous partial matches.
21///
22/// # Supported Keywords
23///
24/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
25/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
26/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
27/// - 8-bit: `ubyte`, `byte`
28/// - String: `string`
29///
30/// # Examples
31///
32/// ```
33/// use libmagic_rs::parser::types::parse_type_keyword;
34///
35/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
36/// assert_eq!(keyword, "bequad");
37/// assert_eq!(rest, " rest");
38/// ```
39///
40/// # Errors
41///
42/// Returns a nom parsing error if the input doesn't start with a known type keyword.
43pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
44    alt((
45        // 64-bit types (6 branches)
46        alt((
47            tag("ubequad"),
48            tag("ulequad"),
49            tag("uquad"),
50            tag("bequad"),
51            tag("lequad"),
52            tag("quad"),
53        )),
54        // 32-bit types (6 branches)
55        alt((
56            tag("ubelong"),
57            tag("ulelong"),
58            tag("ulong"),
59            tag("belong"),
60            tag("lelong"),
61            tag("long"),
62        )),
63        // 16-bit types (6 branches)
64        alt((
65            tag("ubeshort"),
66            tag("uleshort"),
67            tag("ushort"),
68            tag("beshort"),
69            tag("leshort"),
70            tag("short"),
71        )),
72        // 8-bit types (2 branches)
73        alt((tag("ubyte"), tag("byte"))),
74        // Float/double types (6 branches)
75        alt((
76            tag("bedouble"),
77            tag("ledouble"),
78            tag("double"),
79            tag("befloat"),
80            tag("lefloat"),
81            tag("float"),
82        )),
83        // String types (1 branch, will grow with pstring/search/regex)
84        tag("string"),
85    ))
86    .parse(input)
87}
88
89/// Convert a type keyword string to its corresponding [`TypeKind`]
90///
91/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
92/// appropriate `TypeKind` variant with correct endianness and signedness settings.
93///
94/// # Conventions
95///
96/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
97/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
98/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
99/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
100/// - No endian prefix means native endianness
101///
102/// # Examples
103///
104/// ```
105/// use libmagic_rs::parser::types::type_keyword_to_kind;
106/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
107///
108/// assert_eq!(type_keyword_to_kind("byte"), TypeKind::Byte { signed: true });
109/// assert_eq!(type_keyword_to_kind("ubyte"), TypeKind::Byte { signed: false });
110/// assert_eq!(
111///     type_keyword_to_kind("beshort"),
112///     TypeKind::Short { endian: Endianness::Big, signed: true }
113/// );
114/// ```
115///
116/// # Panics
117///
118/// Panics if `type_name` is not a recognized type keyword. This function should
119/// only be called with values returned by [`parse_type_keyword`].
120#[must_use]
121pub fn type_keyword_to_kind(type_name: &str) -> TypeKind {
122    match type_name {
123        // BYTE types (8-bit)
124        "byte" => TypeKind::Byte { signed: true },
125        "ubyte" => TypeKind::Byte { signed: false },
126
127        // SHORT types (16-bit)
128        "short" => TypeKind::Short {
129            endian: Endianness::Native,
130            signed: true,
131        },
132        "ushort" => TypeKind::Short {
133            endian: Endianness::Native,
134            signed: false,
135        },
136        "leshort" => TypeKind::Short {
137            endian: Endianness::Little,
138            signed: true,
139        },
140        "uleshort" => TypeKind::Short {
141            endian: Endianness::Little,
142            signed: false,
143        },
144        "beshort" => TypeKind::Short {
145            endian: Endianness::Big,
146            signed: true,
147        },
148        "ubeshort" => TypeKind::Short {
149            endian: Endianness::Big,
150            signed: false,
151        },
152
153        // LONG types (32-bit)
154        "long" => TypeKind::Long {
155            endian: Endianness::Native,
156            signed: true,
157        },
158        "ulong" => TypeKind::Long {
159            endian: Endianness::Native,
160            signed: false,
161        },
162        "lelong" => TypeKind::Long {
163            endian: Endianness::Little,
164            signed: true,
165        },
166        "ulelong" => TypeKind::Long {
167            endian: Endianness::Little,
168            signed: false,
169        },
170        "belong" => TypeKind::Long {
171            endian: Endianness::Big,
172            signed: true,
173        },
174        "ubelong" => TypeKind::Long {
175            endian: Endianness::Big,
176            signed: false,
177        },
178
179        // QUAD types (64-bit)
180        "quad" => TypeKind::Quad {
181            endian: Endianness::Native,
182            signed: true,
183        },
184        "uquad" => TypeKind::Quad {
185            endian: Endianness::Native,
186            signed: false,
187        },
188        "lequad" => TypeKind::Quad {
189            endian: Endianness::Little,
190            signed: true,
191        },
192        "ulequad" => TypeKind::Quad {
193            endian: Endianness::Little,
194            signed: false,
195        },
196        "bequad" => TypeKind::Quad {
197            endian: Endianness::Big,
198            signed: true,
199        },
200        "ubequad" => TypeKind::Quad {
201            endian: Endianness::Big,
202            signed: false,
203        },
204
205        // FLOAT types (32-bit)
206        "float" => TypeKind::Float {
207            endian: Endianness::Native,
208        },
209        "befloat" => TypeKind::Float {
210            endian: Endianness::Big,
211        },
212        "lefloat" => TypeKind::Float {
213            endian: Endianness::Little,
214        },
215
216        // DOUBLE types (64-bit)
217        "double" => TypeKind::Double {
218            endian: Endianness::Native,
219        },
220        "bedouble" => TypeKind::Double {
221            endian: Endianness::Big,
222        },
223        "ledouble" => TypeKind::Double {
224            endian: Endianness::Little,
225        },
226
227        // STRING type
228        "string" => TypeKind::String { max_length: None },
229
230        _ => unreachable!("type_keyword_to_kind called with unknown type: {type_name}"),
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::parser::ast::Endianness;
238
239    // ============================================================
240    // parse_type_keyword tests
241    // ============================================================
242
243    #[test]
244    fn test_parse_type_keyword_byte_variants() {
245        assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
246        assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
247    }
248
249    #[test]
250    fn test_parse_type_keyword_short_variants() {
251        let cases = [
252            ("short", "short"),
253            ("ushort", "ushort"),
254            ("leshort", "leshort"),
255            ("uleshort", "uleshort"),
256            ("beshort", "beshort"),
257            ("ubeshort", "ubeshort"),
258        ];
259        for (input, expected) in cases {
260            let input_with_rest = format!("{input} rest");
261            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
262            assert_eq!(keyword, expected, "Failed for input: {input}");
263            assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
264        }
265    }
266
267    #[test]
268    fn test_parse_type_keyword_long_variants() {
269        let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
270        for input in cases {
271            let input_with_rest = format!("{input} rest");
272            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
273            assert_eq!(keyword, input, "Failed for: {input}");
274            assert_eq!(rest, " rest");
275        }
276    }
277
278    #[test]
279    fn test_parse_type_keyword_quad_variants() {
280        let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
281        for input in cases {
282            let input_with_rest = format!("{input} rest");
283            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
284            assert_eq!(keyword, input, "Failed for: {input}");
285            assert_eq!(rest, " rest");
286        }
287    }
288
289    #[test]
290    fn test_parse_type_keyword_string() {
291        assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
292    }
293
294    #[test]
295    fn test_parse_type_keyword_unknown() {
296        assert!(parse_type_keyword("unknown rest").is_err());
297    }
298
299    #[test]
300    fn test_parse_type_keyword_empty() {
301        assert!(parse_type_keyword("").is_err());
302    }
303
304    // ============================================================
305    // type_keyword_to_kind tests
306    // ============================================================
307
308    #[test]
309    fn test_type_keyword_to_kind_byte() {
310        assert_eq!(
311            type_keyword_to_kind("byte"),
312            TypeKind::Byte { signed: true }
313        );
314        assert_eq!(
315            type_keyword_to_kind("ubyte"),
316            TypeKind::Byte { signed: false }
317        );
318    }
319
320    #[test]
321    fn test_type_keyword_to_kind_short_endianness() {
322        assert_eq!(
323            type_keyword_to_kind("short"),
324            TypeKind::Short {
325                endian: Endianness::Native,
326                signed: true
327            }
328        );
329        assert_eq!(
330            type_keyword_to_kind("leshort"),
331            TypeKind::Short {
332                endian: Endianness::Little,
333                signed: true
334            }
335        );
336        assert_eq!(
337            type_keyword_to_kind("beshort"),
338            TypeKind::Short {
339                endian: Endianness::Big,
340                signed: true
341            }
342        );
343    }
344
345    #[test]
346    fn test_type_keyword_to_kind_unsigned_variants() {
347        assert_eq!(
348            type_keyword_to_kind("ushort"),
349            TypeKind::Short {
350                endian: Endianness::Native,
351                signed: false
352            }
353        );
354        assert_eq!(
355            type_keyword_to_kind("ulong"),
356            TypeKind::Long {
357                endian: Endianness::Native,
358                signed: false
359            }
360        );
361        assert_eq!(
362            type_keyword_to_kind("uquad"),
363            TypeKind::Quad {
364                endian: Endianness::Native,
365                signed: false
366            }
367        );
368    }
369
370    #[test]
371    fn test_type_keyword_to_kind_signed_defaults() {
372        // libmagic types are signed by default
373        assert_eq!(
374            type_keyword_to_kind("long"),
375            TypeKind::Long {
376                endian: Endianness::Native,
377                signed: true
378            }
379        );
380        assert_eq!(
381            type_keyword_to_kind("quad"),
382            TypeKind::Quad {
383                endian: Endianness::Native,
384                signed: true
385            }
386        );
387    }
388
389    #[test]
390    fn test_type_keyword_to_kind_string() {
391        assert_eq!(
392            type_keyword_to_kind("string"),
393            TypeKind::String { max_length: None }
394        );
395    }
396
397    #[test]
398    fn test_roundtrip_all_keywords() {
399        // Verify that every keyword parsed by parse_type_keyword can be
400        // converted to a TypeKind by type_keyword_to_kind
401        let keywords = [
402            "byte", "ubyte", "short", "ushort", "leshort", "uleshort", "beshort", "ubeshort",
403            "long", "ulong", "lelong", "ulelong", "belong", "ubelong", "quad", "uquad", "lequad",
404            "ulequad", "bequad", "ubequad", "float", "befloat", "lefloat", "double", "bedouble",
405            "ledouble", "string",
406        ];
407        for keyword in keywords {
408            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
409            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
410            // Should not panic
411            let _ = type_keyword_to_kind(parsed);
412        }
413    }
414}