1use crate::DataLiteral;
2use cervine::Cow;
3use gnaw::Unshift as _;
4use lazy_transform_str::{Transform as _, TransformedPart};
5use logos::Logos;
6use smartstring::alias::String;
7use std::{
8 fmt::{Display, Formatter, Result as fmtResult},
9 iter,
10 ops::Range,
11};
12use tap::Tap;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct InvalidDataLiteral<'a, Position> {
19 pub encoding: &'a str,
20 pub encoding_span: Range<Position>,
21 pub unencoded_data: &'a str,
22 pub unencoded_data_span: Range<Position>,
23}
24
25#[must_use = "pure function"]
26pub fn escape_unencoded_data(string: &str) -> Cow<String, str> {
27 string.transform(|rest| match rest.unshift().unwrap() {
28 c @ ('\\' | '>') => {
29 let mut changed = String::from(r"\");
30 changed.push(c);
31 TransformedPart::Changed(changed)
32 }
33 '\r' => TransformedPart::Changed("\\r".into()),
34 _ => TransformedPart::Unchanged,
35 })
36}
37
38macro_rules! define_escape {
39 ($name:ident, delimiter = $delimiter:literal, always_quote = $always_quote:literal) => {
40 fn $name(string: &str) -> Cow<String, str> {
41 let mut quote = $always_quote
42 || match string.chars().next() {
43 Some(first) => first == '-' || first.is_ascii_digit(),
44 None => true,
45 };
46 let escaped_name = string.transform(|rest| match rest.unshift().unwrap() {
47 c @ ('\\' | $delimiter) => {
48 quote = true;
49 let mut changed = String::from(r"\");
50 changed.push(c);
51 TransformedPart::Changed(changed)
52 }
53 '\r' => {
54 quote = true;
55 TransformedPart::Changed("\\r".into())
56 }
57 c => {
58 if !(('a'..='z').contains(&c)
59 || ('A'..='Z').contains(&c)
60 || c == '-' || c == '_'
61 || ('0'..'9').contains(&c))
62 {
63 quote = true
64 }
65 TransformedPart::Unchanged
66 }
67 });
68 if quote {
69 let mut quoted = String::from(concat!($delimiter));
70 quoted.push_str(&escaped_name);
71 quoted.push($delimiter);
72 Cow::Owned(quoted)
73 } else {
74 escaped_name
75 }
76 }
77 };
78}
79
80define_escape!(escape_string, delimiter = '"', always_quote = true);
81define_escape!(escape_identifier, delimiter = '`', always_quote = false);
82
83fn unescape_verbatim_and_r_to_carriage_return(string: &str) -> Cow<String, str> {
84 let mut escaped = false;
85 string.transform(|rest| {
86 match rest.unshift().unwrap() {
87 '\\' if !escaped => {
88 escaped = true;
89 return TransformedPart::Changed(String::new());
90 }
91 'r' if escaped => TransformedPart::Changed("\r".into()),
92 _ => {
93 TransformedPart::Unchanged
95 }
96 }
97 .tap(|_| escaped = false)
98 })
99}
100
101fn trim_trailing_0s(mut s: &str) -> &str {
102 while s.len() >= 2
103 && s.as_bytes()[s.len() - 1] == b'0'
104 && (b'0'..=b'9').contains(&s.as_bytes()[s.len() - 2])
105 {
106 s = &s[..s.len() - 1]
107 }
108 s
109}
110
111#[derive(Logos, Debug, Clone, PartialEq, Eq)]
112#[logos(type Position = usize)]
113pub enum Token<'a, Position> {
114 #[regex(r"//[^\r\n]+", |lex| lex.slice()[2..].trim_end_matches([' ', '\t'].as_ref()))]
115 Comment(&'a str),
116
117 #[regex("#+", |lex| lex.slice().chars().count())]
118 HeadingHashes(usize),
119
120 #[regex("\r?\n")]
121 Newline,
122
123 #[token("[")]
124 Brac,
125 #[token("]")]
126 Ket,
127
128 #[token("{")]
129 Bra,
130 #[token("}")]
131 Ce,
132
133 #[token("(")]
134 Paren,
135 #[token(")")]
136 Thesis,
137
138 #[token(",")]
139 Comma,
140
141 #[token(".")]
142 Period,
143
144 #[regex(r#""([^\\"\r]|\\\\|\\"|\\r)*""#, priority = 1000, callback = |lex| unescape_verbatim_and_r_to_carriage_return(&lex.slice()[1..lex.slice().len() - 1]))]
145 String(Cow<'a, String, str>),
146
147 #[regex(r#""([^\\"]|\\\\|\\"|\\r)*""#, |lex| &lex.slice()[1..lex.slice().len() - 1])]
149 InvalidStringWithVerbatimCarriageReturn(&'a str),
150
151 #[regex(r#"<[a-zA-Z_][a-zA-Z\-_0-9]*:([^\\>]|\\\\|\\>)*>"#, |lex| {
152 let (encoding, unencoded_data) = lex.slice()['<'.len_utf8()..lex.slice().len() - '>'.len_utf8()].split_once(':').unwrap(); DataLiteral {
154 encoding: Cow::Borrowed(encoding),
155 encoding_span: lex.span().start + '<'.len_utf8()..lex.span().start + '<'.len_utf8() + encoding.len(),
156 unencoded_data: unescape_verbatim_and_r_to_carriage_return(unencoded_data),
157 unencoded_data_span: lex.span().end - 1 - unencoded_data.len()..lex.span().end - 1,
158 }
159 })]
160 #[regex(r#"<`([^\\`\r]|\\\\|\\`|\\r)*`:([^\\>\r]|\\\\|\\>|\\r)*>"#, priority = 1000, callback = |lex| {
161 let (encoding, unencoded_data) = lex.slice()['<'.len_utf8()..lex.slice().len() - '>'.len_utf8()].split_once(':').unwrap(); DataLiteral {
163 encoding: unescape_verbatim_and_r_to_carriage_return(&encoding['`'.len_utf8()..encoding.len()-'`'.len_utf8()]),
164 encoding_span: lex.span().start + '`'.len_utf8()..lex.span().start + '`'.len_utf8() + encoding.len(),
165 unencoded_data: unescape_verbatim_and_r_to_carriage_return(unencoded_data),
166 unencoded_data_span: lex.span().end - '>'.len_utf8() - unencoded_data.len()..lex.span().end - '>'.len_utf8(),
167 }
168 })]
169 DataLiteral(DataLiteral<'a, Position>),
170
171 #[regex(r#"<`([^\\`]|\\\\|\\`|\\r)*`:([^\\>]|\\\\|\\>|\\r)*>"#, |lex| {
173 let (encoding, unencoded_data) = lex.slice()[1..lex.slice().len() - 1].split_once(':').unwrap();
174 InvalidDataLiteral {
175 encoding,
176 encoding_span: lex.span().start + '`'.len_utf8()..lex.span().start + '`'.len_utf8() + encoding.len(),
177 unencoded_data,
178 unencoded_data_span: lex.span().end - '>'.len_utf8() - unencoded_data.len()..lex.span().end - '>'.len_utf8(),
179 }
180 })]
181 InvalidDataLiteralWithVerbatimCarriageReturn(InvalidDataLiteral<'a, Position>),
182
183 #[regex(r"-?(0|[1-9]\d*)\.\d+", |lex| trim_trailing_0s(lex.slice()))]
184 Decimal(&'a str),
185
186 #[regex(r"-?(0\d+)\.\d+", |lex| trim_trailing_0s(lex.slice()))]
187 InvalidZeroPrefixedDecimal(&'a str),
188
189 #[regex(r"-?(0|[1-9]\d*)", |lex| lex.slice())]
190 Integer(&'a str),
191
192 #[regex(r"-?(0\d+)", |lex| lex.slice())]
193 InvalidZeroPrefixedInteger(&'a str),
194
195 #[token(":")]
196 Colon,
197
198 #[regex(r"[a-zA-Z_][a-zA-Z\-_0-9]*", |lex| Cow::Borrowed(lex.slice()))]
199 #[regex(r"`([^\\`\r]|\\\\|\\`|\\r)*`", priority = 1000, callback = |lex| unescape_verbatim_and_r_to_carriage_return(&lex.slice()['`'.len_utf8()..lex.slice().len() - '`'.len_utf8()]))]
200 Identifier(Cow<'a, String, str>),
201
202 #[regex(r"`([^\\`\r]|\\\\|\\`|\\r)*`", |lex| lex.slice()['`'.len_utf8()..lex.slice().len() - '`'.len_utf8()])]
204 InvalidIdentifierWithVerbatimCarriageReturn(&'a str),
205
206 #[error]
207 #[regex(r"[ \t]+", logos::skip)]
208 Error,
209}
210
211impl<'a, Position> Display for Token<'a, Position> {
218 fn fmt(&self, f: &mut Formatter<'_>) -> fmtResult {
219 match self {
220 Token::Comment(str) => write!(f, "//{}", str),
221 Token::HeadingHashes(count) => {
222 write!(f, "{}", iter::repeat('#').take(*count).collect::<String>())
223 }
224 Token::Newline => writeln!(f),
225 Token::Brac => write!(f, "["),
226 Token::Ket => write!(f, "]"),
227 Token::Bra => write!(f, "{{"),
228 Token::Ce => write!(f, "}}"),
229 Token::Paren => write!(f, "("),
230 Token::Thesis => write!(f, ")"),
231 Token::Comma => write!(f, ","),
232 Token::Period => write!(f, "."),
233 Token::DataLiteral(DataLiteral {
234 encoding,
235 unencoded_data,
236 ..
237 }) => {
238 write!(
239 f,
240 "<{}:{}>",
241 escape_identifier(encoding),
242 escape_unencoded_data(unencoded_data)
243 )
244 }
245 Self::InvalidDataLiteralWithVerbatimCarriageReturn(invalid_data_literal) => write!(
246 f,
247 "<`{}`:{}>",
248 invalid_data_literal.encoding,
249 invalid_data_literal.unencoded_data ),
251 Token::String(str) => write!(f, "{}", escape_string(str)),
252 Token::InvalidStringWithVerbatimCarriageReturn(str) => write!(f, r#""{}""#, str), Token::Decimal(str)
254 | Token::Integer(str)
255 | Self::InvalidZeroPrefixedDecimal(str)
256 | Token::InvalidZeroPrefixedInteger(str) => write!(f, "{}", str),
257 Token::Colon => write!(f, ":"),
258 Token::Identifier(str) => write!(f, "{}", escape_identifier(str)),
259 Token::InvalidIdentifierWithVerbatimCarriageReturn(str) => write!(f, "`{}`", str), Token::Error => panic!("Tried to `Display::fmt` `taml::token::Token::Error`."),
261 }
262 }
263}
264
265#[cfg(test)]
266#[test]
267#[allow(clippy::enum_glob_use)]
268fn lex() {
269 use Token::*;
270
271 let source = r#"//This is a comment
272 # [[loops].{sound, volume}]
273 "$sewer/amb_drips", 0.8
274 "$sewer/amb_flies", 0.1000
275 "$sewer/amb_hum", 0.0500
276
277 # [moments]
278 sound: "$sewer/moments/*"
279 layers: 1
280 first-interval-no-min: true
281 interval-range: (10, 60)
282 volume-range: (0.1, 0.15)
283 "#;
284
285 let lex = Token::lexer(source);
286
287 let tokens: Vec<_> = lex.collect();
288
289 assert_eq!(
290 tokens.as_slice(),
291 &[
292 Comment("This is a comment"),
293 Newline,
294 HeadingHashes(1),
295 Brac,
296 Brac,
297 Identifier(Cow::Borrowed("loops")),
298 Ket,
299 Period,
300 Bra,
301 Identifier(Cow::Borrowed("sound")),
302 Comma,
303 Identifier(Cow::Borrowed("volume")),
304 Ce,
305 Ket,
306 Newline,
307 String(Cow::Borrowed("$sewer/amb_drips")),
308 Comma,
309 Decimal("0.8"),
310 Newline,
311 String(Cow::Borrowed("$sewer/amb_flies")),
312 Comma,
313 Decimal("0.1"),
314 Newline,
315 String(Cow::Borrowed("$sewer/amb_hum")),
316 Comma,
317 Decimal("0.05"),
318 Newline,
319 Newline,
320 HeadingHashes(1),
321 Brac,
322 Identifier(Cow::Borrowed("moments")),
323 Ket,
324 Newline,
325 Identifier(Cow::Borrowed("sound")),
326 Colon,
327 String(Cow::Borrowed("$sewer/moments/*")),
328 Newline,
329 Identifier(Cow::Borrowed("layers")),
330 Colon,
331 Integer("1"),
332 Newline,
333 Identifier(Cow::Borrowed("first-interval-no-min")),
334 Colon,
335 Identifier(Cow::Borrowed("true")),
336 Newline,
337 Identifier(Cow::Borrowed("interval-range")),
338 Colon,
339 Paren,
340 Integer("10"),
341 Comma,
342 Integer("60"),
343 Thesis,
344 Newline,
345 Identifier(Cow::Borrowed("volume-range")),
346 Colon,
347 Paren,
348 Decimal("0.1"),
349 Comma,
350 Decimal("0.15"),
351 Thesis,
352 Newline
353 ][..]
354 );
355}