use nom::{IResult, Parser, branch::alt, bytes::complete::tag};
use crate::parser::ast::{Endianness, MetaType, PStringLengthWidth, TypeKind};
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[non_exhaustive]
#[error("unknown type keyword: {keyword}")]
pub struct UnknownTypeKeyword {
pub keyword: String,
}
pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
alt((
alt((
tag("ubequad"),
tag("ulequad"),
tag("uquad"),
tag("bequad"),
tag("lequad"),
tag("quad"),
)),
alt((
tag("ubelong"),
tag("ulelong"),
tag("ulong"),
tag("belong"),
tag("lelong"),
tag("long"),
)),
alt((
tag("ubeshort"),
tag("uleshort"),
tag("ushort"),
tag("beshort"),
tag("leshort"),
tag("short"),
)),
alt((tag("ubyte"), tag("byte"))),
alt((
tag("bedouble"),
tag("ledouble"),
tag("double"),
tag("befloat"),
tag("lefloat"),
tag("float"),
)),
alt((
tag("beqldate"),
tag("leqldate"),
tag("beqdate"),
tag("leqdate"),
tag("qldate"),
tag("qdate"),
tag("beldate"),
tag("leldate"),
tag("bedate"),
tag("ldate"),
tag("ledate"),
tag("date"),
)),
alt((
tag("lestring16"),
tag("bestring16"),
tag("pstring"),
tag("search"),
tag("regex"),
tag("string"),
)),
alt((
tag("indirect"),
tag("default"),
tag("offset"),
tag("clear"),
tag("name"),
tag("use"),
)),
))
.parse(input)
}
pub fn type_keyword_to_kind(type_name: &str) -> Result<Option<TypeKind>, UnknownTypeKeyword> {
if matches!(type_name, "regex" | "search" | "name" | "use") {
return Ok(None);
}
match type_name {
"default" => return Ok(Some(TypeKind::Meta(MetaType::Default))),
"clear" => return Ok(Some(TypeKind::Meta(MetaType::Clear))),
"indirect" => return Ok(Some(TypeKind::Meta(MetaType::Indirect))),
"offset" => return Ok(Some(TypeKind::Meta(MetaType::Offset))),
_ => {}
}
if let Some(kind) = byte_family(type_name)
.or_else(|| short_family(type_name))
.or_else(|| long_family(type_name))
.or_else(|| quad_family(type_name))
.or_else(|| float_family(type_name))
.or_else(|| double_family(type_name))
.or_else(|| date_family(type_name))
.or_else(|| qdate_family(type_name))
.or_else(|| string_family(type_name))
.or_else(|| string16_family(type_name))
{
return Ok(Some(kind));
}
Err(UnknownTypeKeyword {
keyword: type_name.to_string(),
})
}
fn byte_family(name: &str) -> Option<TypeKind> {
match name {
"byte" => Some(TypeKind::Byte { signed: true }),
"ubyte" => Some(TypeKind::Byte { signed: false }),
_ => None,
}
}
fn short_family(name: &str) -> Option<TypeKind> {
let (endian, signed) = match name {
"short" => (Endianness::Native, true),
"ushort" => (Endianness::Native, false),
"leshort" => (Endianness::Little, true),
"uleshort" => (Endianness::Little, false),
"beshort" => (Endianness::Big, true),
"ubeshort" => (Endianness::Big, false),
_ => return None,
};
Some(TypeKind::Short { endian, signed })
}
fn long_family(name: &str) -> Option<TypeKind> {
let (endian, signed) = match name {
"long" => (Endianness::Native, true),
"ulong" => (Endianness::Native, false),
"lelong" => (Endianness::Little, true),
"ulelong" => (Endianness::Little, false),
"belong" => (Endianness::Big, true),
"ubelong" => (Endianness::Big, false),
_ => return None,
};
Some(TypeKind::Long { endian, signed })
}
fn quad_family(name: &str) -> Option<TypeKind> {
let (endian, signed) = match name {
"quad" => (Endianness::Native, true),
"uquad" => (Endianness::Native, false),
"lequad" => (Endianness::Little, true),
"ulequad" => (Endianness::Little, false),
"bequad" => (Endianness::Big, true),
"ubequad" => (Endianness::Big, false),
_ => return None,
};
Some(TypeKind::Quad { endian, signed })
}
fn float_family(name: &str) -> Option<TypeKind> {
let endian = match name {
"float" => Endianness::Native,
"befloat" => Endianness::Big,
"lefloat" => Endianness::Little,
_ => return None,
};
Some(TypeKind::Float { endian })
}
fn double_family(name: &str) -> Option<TypeKind> {
let endian = match name {
"double" => Endianness::Native,
"bedouble" => Endianness::Big,
"ledouble" => Endianness::Little,
_ => return None,
};
Some(TypeKind::Double { endian })
}
fn date_family(name: &str) -> Option<TypeKind> {
let (endian, utc) = match name {
"date" => (Endianness::Native, true),
"ldate" => (Endianness::Native, false),
"bedate" => (Endianness::Big, true),
"beldate" => (Endianness::Big, false),
"ledate" => (Endianness::Little, true),
"leldate" => (Endianness::Little, false),
_ => return None,
};
Some(TypeKind::Date { endian, utc })
}
fn qdate_family(name: &str) -> Option<TypeKind> {
let (endian, utc) = match name {
"qdate" => (Endianness::Native, true),
"qldate" => (Endianness::Native, false),
"beqdate" => (Endianness::Big, true),
"beqldate" => (Endianness::Big, false),
"leqdate" => (Endianness::Little, true),
"leqldate" => (Endianness::Little, false),
_ => return None,
};
Some(TypeKind::QDate { endian, utc })
}
fn string_family(name: &str) -> Option<TypeKind> {
match name {
"string" => Some(TypeKind::String { max_length: None }),
"pstring" => Some(TypeKind::PString {
max_length: None,
length_width: PStringLengthWidth::OneByte,
length_includes_itself: false,
}),
_ => None,
}
}
fn string16_family(name: &str) -> Option<TypeKind> {
match name {
"lestring16" => Some(TypeKind::String16 {
endian: Endianness::Little,
}),
"bestring16" => Some(TypeKind::String16 {
endian: Endianness::Big,
}),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::ast::Endianness;
#[test]
fn test_parse_type_keyword_byte_variants() {
assert_eq!(parse_type_keyword("byte rest"), Ok((" rest", "byte")));
assert_eq!(parse_type_keyword("ubyte rest"), Ok((" rest", "ubyte")));
}
#[test]
fn test_parse_type_keyword_short_variants() {
let cases = [
("short", "short"),
("ushort", "ushort"),
("leshort", "leshort"),
("uleshort", "uleshort"),
("beshort", "beshort"),
("ubeshort", "ubeshort"),
];
for (input, expected) in cases {
let input_with_rest = format!("{input} rest");
let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
assert_eq!(keyword, expected, "Failed for input: {input}");
assert_eq!(rest, " rest", "Wrong remaining for input: {input}");
}
}
#[test]
fn test_parse_type_keyword_long_variants() {
let cases = ["long", "ulong", "lelong", "ulelong", "belong", "ubelong"];
for input in cases {
let input_with_rest = format!("{input} rest");
let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
assert_eq!(keyword, input, "Failed for: {input}");
assert_eq!(rest, " rest");
}
}
#[test]
fn test_parse_type_keyword_quad_variants() {
let cases = ["quad", "uquad", "lequad", "ulequad", "bequad", "ubequad"];
for input in cases {
let input_with_rest = format!("{input} rest");
let (rest, keyword) = parse_type_keyword(&input_with_rest).unwrap();
assert_eq!(keyword, input, "Failed for: {input}");
assert_eq!(rest, " rest");
}
}
#[test]
fn test_parse_type_keyword_string() {
assert_eq!(parse_type_keyword("string rest"), Ok((" rest", "string")));
}
#[test]
fn test_parse_type_keyword_unknown() {
assert!(parse_type_keyword("unknown rest").is_err());
}
#[test]
fn test_parse_type_keyword_empty() {
assert!(parse_type_keyword("").is_err());
}
#[test]
fn test_type_keyword_to_kind_byte() {
assert_eq!(
type_keyword_to_kind("byte"),
Ok(Some(TypeKind::Byte { signed: true }))
);
assert_eq!(
type_keyword_to_kind("ubyte"),
Ok(Some(TypeKind::Byte { signed: false }))
);
}
#[test]
fn test_type_keyword_to_kind_short_endianness() {
assert_eq!(
type_keyword_to_kind("short"),
Ok(Some(TypeKind::Short {
endian: Endianness::Native,
signed: true
}))
);
assert_eq!(
type_keyword_to_kind("leshort"),
Ok(Some(TypeKind::Short {
endian: Endianness::Little,
signed: true
}))
);
assert_eq!(
type_keyword_to_kind("beshort"),
Ok(Some(TypeKind::Short {
endian: Endianness::Big,
signed: true
}))
);
}
#[test]
fn test_type_keyword_to_kind_unsigned_variants() {
assert_eq!(
type_keyword_to_kind("ushort"),
Ok(Some(TypeKind::Short {
endian: Endianness::Native,
signed: false
}))
);
assert_eq!(
type_keyword_to_kind("ulong"),
Ok(Some(TypeKind::Long {
endian: Endianness::Native,
signed: false
}))
);
assert_eq!(
type_keyword_to_kind("uquad"),
Ok(Some(TypeKind::Quad {
endian: Endianness::Native,
signed: false
}))
);
}
#[test]
fn test_type_keyword_to_kind_signed_defaults() {
assert_eq!(
type_keyword_to_kind("long"),
Ok(Some(TypeKind::Long {
endian: Endianness::Native,
signed: true
}))
);
assert_eq!(
type_keyword_to_kind("quad"),
Ok(Some(TypeKind::Quad {
endian: Endianness::Native,
signed: true
}))
);
}
#[test]
fn test_type_keyword_to_kind_string() {
assert_eq!(
type_keyword_to_kind("string"),
Ok(Some(TypeKind::String { max_length: None }))
);
}
#[test]
fn test_parse_type_keyword_pstring() {
assert_eq!(parse_type_keyword("pstring rest"), Ok((" rest", "pstring")));
}
#[test]
fn test_type_keyword_to_kind_pstring() {
assert_eq!(
type_keyword_to_kind("pstring"),
Ok(Some(TypeKind::PString {
max_length: None,
length_width: PStringLengthWidth::OneByte,
length_includes_itself: false
}))
);
}
#[test]
fn test_type_keyword_to_kind_regex_and_search_return_none() {
assert_eq!(type_keyword_to_kind("regex"), Ok(None));
assert_eq!(type_keyword_to_kind("search"), Ok(None));
}
#[test]
fn test_type_keyword_to_kind_unknown_returns_err() {
let err = type_keyword_to_kind("nonexistent").expect_err("unknown keyword must return Err");
assert_eq!(err.keyword, "nonexistent");
assert!(err.to_string().contains("nonexistent"));
}
#[test]
fn test_pstring_keyword_defaults_to_one_byte_width() {
let kind = type_keyword_to_kind("pstring")
.expect("pstring is a known keyword")
.expect("pstring maps to Some(TypeKind)");
match kind {
TypeKind::PString {
max_length,
length_width,
length_includes_itself: _,
} => {
assert_eq!(
max_length, None,
"pstring default should have no max_length"
);
assert_eq!(
length_width,
PStringLengthWidth::OneByte,
"pstring default should be OneByte"
);
}
_ => panic!("Expected TypeKind::PString, got {kind:?}"),
}
}
#[test]
fn test_pstring_keyword_does_not_consume_suffix() {
let (rest, keyword) = parse_type_keyword("pstring/H =value").unwrap();
assert_eq!(keyword, "pstring");
assert_eq!(
rest, "/H =value",
"Suffix should remain unconsumed by type keyword parser"
);
}
#[test]
fn test_pstring_keyword_boundary() {
let (rest, keyword) = parse_type_keyword("pstring").unwrap();
assert_eq!(keyword, "pstring");
assert_eq!(rest, "");
}
#[test]
fn test_pstring_before_operator() {
let (rest, keyword) = parse_type_keyword("pstring =hello").unwrap();
assert_eq!(keyword, "pstring");
assert_eq!(rest, " =hello");
}
#[test]
fn test_parse_type_keyword_string16_variants() {
let (rest, kw) = parse_type_keyword("lestring16 rest").unwrap();
assert_eq!(kw, "lestring16");
assert_eq!(rest, " rest");
let (rest, kw) = parse_type_keyword("bestring16 rest").unwrap();
assert_eq!(kw, "bestring16");
assert_eq!(rest, " rest");
}
#[test]
fn test_string16_keyword_to_kind() {
assert_eq!(
type_keyword_to_kind("lestring16"),
Ok(Some(TypeKind::String16 {
endian: Endianness::Little,
}))
);
assert_eq!(
type_keyword_to_kind("bestring16"),
Ok(Some(TypeKind::String16 {
endian: Endianness::Big,
}))
);
}
#[test]
fn test_roundtrip_all_keywords() {
let convertible_keywords = [
"byte",
"ubyte",
"short",
"ushort",
"leshort",
"uleshort",
"beshort",
"ubeshort",
"long",
"ulong",
"lelong",
"ulelong",
"belong",
"ubelong",
"quad",
"uquad",
"lequad",
"ulequad",
"bequad",
"ubequad",
"float",
"befloat",
"lefloat",
"double",
"bedouble",
"ledouble",
"date",
"ldate",
"bedate",
"beldate",
"ledate",
"leldate",
"qdate",
"qldate",
"beqdate",
"beqldate",
"leqdate",
"leqldate",
"pstring",
"string",
"lestring16",
"bestring16",
"default",
"clear",
"indirect",
"offset",
];
for keyword in convertible_keywords {
let (rest, parsed) = parse_type_keyword(keyword).unwrap();
assert_eq!(rest, "", "Keyword {keyword} should consume all input");
assert!(
type_keyword_to_kind(parsed).is_ok_and(|o| o.is_some()),
"{keyword} should map to Ok(Some(TypeKind))"
);
}
for keyword in ["regex", "search", "name", "use"] {
let (rest, parsed) = parse_type_keyword(keyword).unwrap();
assert_eq!(rest, "", "Keyword {keyword} should consume all input");
assert_eq!(
type_keyword_to_kind(parsed),
Ok(None),
"{keyword} should return Ok(None) from keyword-to-kind"
);
}
}
}