Skip to main content

libmagic_rs/parser/
types.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Type keyword parsing for magic file types
5//!
6//! This module handles parsing and classification of magic file type keywords
7//! (byte, short, long, quad, string, etc.) into their corresponding [`TypeKind`]
8//! representations. It extracts the type keyword recognition from the grammar
9//! module to keep type-specific logic cohesive and manageable as new types are
10//! added.
11
12use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
13
14use crate::parser::ast::{Endianness, TypeKind};
15
16/// Parse a type keyword from magic file input
17///
18/// Recognizes all supported type keywords and returns the matched keyword string.
19/// Type keywords are organized by bit width (64, 32, 16, 8 bits) with longest
20/// prefixes matched first within each group to avoid ambiguous partial matches.
21///
22/// # Supported Keywords
23///
24/// - 64-bit: `ubequad`, `ulequad`, `uquad`, `bequad`, `lequad`, `quad`
25/// - 32-bit: `ubelong`, `ulelong`, `ulong`, `belong`, `lelong`, `long`
26/// - 16-bit: `ubeshort`, `uleshort`, `ushort`, `beshort`, `leshort`, `short`
27/// - 8-bit: `ubyte`, `byte`
28/// - String: `string`
29///
30/// # Examples
31///
32/// ```
33/// use libmagic_rs::parser::types::parse_type_keyword;
34///
35/// let (rest, keyword) = parse_type_keyword("bequad rest").unwrap();
36/// assert_eq!(keyword, "bequad");
37/// assert_eq!(rest, " rest");
38/// ```
39///
40/// # Errors
41///
42/// Returns a nom parsing error if the input doesn't start with a known type keyword.
43pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
44    alt((
45        // 64-bit types (6 branches)
46        alt((
47            tag("ubequad"),
48            tag("ulequad"),
49            tag("uquad"),
50            tag("bequad"),
51            tag("lequad"),
52            tag("quad"),
53        )),
54        // 32-bit types (6 branches)
55        alt((
56            tag("ubelong"),
57            tag("ulelong"),
58            tag("ulong"),
59            tag("belong"),
60            tag("lelong"),
61            tag("long"),
62        )),
63        // 16-bit types (6 branches)
64        alt((
65            tag("ubeshort"),
66            tag("uleshort"),
67            tag("ushort"),
68            tag("beshort"),
69            tag("leshort"),
70            tag("short"),
71        )),
72        // 8-bit types (2 branches)
73        alt((tag("ubyte"), tag("byte"))),
74        // String types (1 branch, will grow with pstring/search/regex)
75        tag("string"),
76    ))
77    .parse(input)
78}
79
80/// Convert a type keyword string to its corresponding [`TypeKind`]
81///
82/// Maps a previously parsed type keyword (from [`parse_type_keyword`]) to the
83/// appropriate `TypeKind` variant with correct endianness and signedness settings.
84///
85/// # Conventions
86///
87/// - Unprefixed types are signed (libmagic default): `byte`, `short`, `long`, `quad`
88/// - `u` prefix indicates unsigned: `ubyte`, `ushort`, `ulong`, `uquad`
89/// - `be` prefix indicates big-endian: `beshort`, `belong`, `bequad`
90/// - `le` prefix indicates little-endian: `leshort`, `lelong`, `lequad`
91/// - No endian prefix means native endianness
92///
93/// # Examples
94///
95/// ```
96/// use libmagic_rs::parser::types::type_keyword_to_kind;
97/// use libmagic_rs::parser::ast::{TypeKind, Endianness};
98///
99/// assert_eq!(type_keyword_to_kind("byte"), TypeKind::Byte { signed: true });
100/// assert_eq!(type_keyword_to_kind("ubyte"), TypeKind::Byte { signed: false });
101/// assert_eq!(
102///     type_keyword_to_kind("beshort"),
103///     TypeKind::Short { endian: Endianness::Big, signed: true }
104/// );
105/// ```
106///
107/// # Panics
108///
109/// Panics if `type_name` is not a recognized type keyword. This function should
110/// only be called with values returned by [`parse_type_keyword`].
111#[must_use]
112pub fn type_keyword_to_kind(type_name: &str) -> TypeKind {
113    match type_name {
114        // BYTE types (8-bit)
115        "byte" => TypeKind::Byte { signed: true },
116        "ubyte" => TypeKind::Byte { signed: false },
117
118        // SHORT types (16-bit)
119        "short" => TypeKind::Short {
120            endian: Endianness::Native,
121            signed: true,
122        },
123        "ushort" => TypeKind::Short {
124            endian: Endianness::Native,
125            signed: false,
126        },
127        "leshort" => TypeKind::Short {
128            endian: Endianness::Little,
129            signed: true,
130        },
131        "uleshort" => TypeKind::Short {
132            endian: Endianness::Little,
133            signed: false,
134        },
135        "beshort" => TypeKind::Short {
136            endian: Endianness::Big,
137            signed: true,
138        },
139        "ubeshort" => TypeKind::Short {
140            endian: Endianness::Big,
141            signed: false,
142        },
143
144        // LONG types (32-bit)
145        "long" => TypeKind::Long {
146            endian: Endianness::Native,
147            signed: true,
148        },
149        "ulong" => TypeKind::Long {
150            endian: Endianness::Native,
151            signed: false,
152        },
153        "lelong" => TypeKind::Long {
154            endian: Endianness::Little,
155            signed: true,
156        },
157        "ulelong" => TypeKind::Long {
158            endian: Endianness::Little,
159            signed: false,
160        },
161        "belong" => TypeKind::Long {
162            endian: Endianness::Big,
163            signed: true,
164        },
165        "ubelong" => TypeKind::Long {
166            endian: Endianness::Big,
167            signed: false,
168        },
169
170        // QUAD types (64-bit)
171        "quad" => TypeKind::Quad {
172            endian: Endianness::Native,
173            signed: true,
174        },
175        "uquad" => TypeKind::Quad {
176            endian: Endianness::Native,
177            signed: false,
178        },
179        "lequad" => TypeKind::Quad {
180            endian: Endianness::Little,
181            signed: true,
182        },
183        "ulequad" => TypeKind::Quad {
184            endian: Endianness::Little,
185            signed: false,
186        },
187        "bequad" => TypeKind::Quad {
188            endian: Endianness::Big,
189            signed: true,
190        },
191        "ubequad" => TypeKind::Quad {
192            endian: Endianness::Big,
193            signed: false,
194        },
195
196        // STRING type
197        "string" => TypeKind::String { max_length: None },
198
199        _ => unreachable!("type_keyword_to_kind called with unknown type: {type_name}"),
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use crate::parser::ast::Endianness;
207
208    // ============================================================
209    // parse_type_keyword tests
210    // ============================================================
211
212    #[test]
213    fn test_parse_type_keyword_byte_variants() {
214        assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
215        assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
216    }
217
218    #[test]
219    fn test_parse_type_keyword_short_variants() {
220        let cases = [
221            ("short", "short"),
222            ("ushort", "ushort"),
223            ("leshort", "leshort"),
224            ("uleshort", "uleshort"),
225            ("beshort", "beshort"),
226            ("ubeshort", "ubeshort"),
227        ];
228        for (input, expected) in cases {
229            let input_with_rest = format!("{input} rest");
230            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
231            assert_eq!(keyword, expected, "Failed for input: {input}");
232            assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
233        }
234    }
235
236    #[test]
237    fn test_parse_type_keyword_long_variants() {
238        let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
239        for input in cases {
240            let input_with_rest = format!("{input} rest");
241            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
242            assert_eq!(keyword, input, "Failed for: {input}");
243            assert_eq!(rest, " rest");
244        }
245    }
246
247    #[test]
248    fn test_parse_type_keyword_quad_variants() {
249        let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
250        for input in cases {
251            let input_with_rest = format!("{input} rest");
252            let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
253            assert_eq!(keyword, input, "Failed for: {input}");
254            assert_eq!(rest, " rest");
255        }
256    }
257
258    #[test]
259    fn test_parse_type_keyword_string() {
260        assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
261    }
262
263    #[test]
264    fn test_parse_type_keyword_unknown() {
265        assert!(parse_type_keyword("unknown rest").is_err());
266    }
267
268    #[test]
269    fn test_parse_type_keyword_empty() {
270        assert!(parse_type_keyword("").is_err());
271    }
272
273    // ============================================================
274    // type_keyword_to_kind tests
275    // ============================================================
276
277    #[test]
278    fn test_type_keyword_to_kind_byte() {
279        assert_eq!(
280            type_keyword_to_kind("byte"),
281            TypeKind::Byte { signed: true }
282        );
283        assert_eq!(
284            type_keyword_to_kind("ubyte"),
285            TypeKind::Byte { signed: false }
286        );
287    }
288
289    #[test]
290    fn test_type_keyword_to_kind_short_endianness() {
291        assert_eq!(
292            type_keyword_to_kind("short"),
293            TypeKind::Short {
294                endian: Endianness::Native,
295                signed: true
296            }
297        );
298        assert_eq!(
299            type_keyword_to_kind("leshort"),
300            TypeKind::Short {
301                endian: Endianness::Little,
302                signed: true
303            }
304        );
305        assert_eq!(
306            type_keyword_to_kind("beshort"),
307            TypeKind::Short {
308                endian: Endianness::Big,
309                signed: true
310            }
311        );
312    }
313
314    #[test]
315    fn test_type_keyword_to_kind_unsigned_variants() {
316        assert_eq!(
317            type_keyword_to_kind("ushort"),
318            TypeKind::Short {
319                endian: Endianness::Native,
320                signed: false
321            }
322        );
323        assert_eq!(
324            type_keyword_to_kind("ulong"),
325            TypeKind::Long {
326                endian: Endianness::Native,
327                signed: false
328            }
329        );
330        assert_eq!(
331            type_keyword_to_kind("uquad"),
332            TypeKind::Quad {
333                endian: Endianness::Native,
334                signed: false
335            }
336        );
337    }
338
339    #[test]
340    fn test_type_keyword_to_kind_signed_defaults() {
341        // libmagic types are signed by default
342        assert_eq!(
343            type_keyword_to_kind("long"),
344            TypeKind::Long {
345                endian: Endianness::Native,
346                signed: true
347            }
348        );
349        assert_eq!(
350            type_keyword_to_kind("quad"),
351            TypeKind::Quad {
352                endian: Endianness::Native,
353                signed: true
354            }
355        );
356    }
357
358    #[test]
359    fn test_type_keyword_to_kind_string() {
360        assert_eq!(
361            type_keyword_to_kind("string"),
362            TypeKind::String { max_length: None }
363        );
364    }
365
366    #[test]
367    fn test_roundtrip_all_keywords() {
368        // Verify that every keyword parsed by parse_type_keyword can be
369        // converted to a TypeKind by type_keyword_to_kind
370        let keywords = [
371            "byte", "ubyte", "short", "ushort", "leshort", "uleshort", "beshort", "ubeshort",
372            "long", "ulong", "lelong", "ulelong", "belong", "ubelong", "quad", "uquad", "lequad",
373            "ulequad", "bequad", "ubequad", "string",
374        ];
375        for keyword in keywords {
376            let (rest, parsed) = parse_type_keyword(keyword).unwrap();
377            assert_eq!(rest, "", "Keyword {keyword} should consume all input");
378            // Should not panic
379            let _ = type_keyword_to_kind(parsed);
380        }
381    }
382}