1extern crate nom;
2
3use nom::{
4 branch::alt,
5 bytes::complete::{is_not, tag, tag_no_case, take_till, take_until},
6 character::{
7 complete::{alpha1, alphanumeric1, anychar, char, multispace0},
8 is_space,
9 },
10 combinator::{opt, recognize},
11 multi::many0_count,
12 sequence::{pair, tuple},
13 IResult, InputTake,
14};
15use nom_locate::{position, LocatedSpan};
16
17use super::ast::{Node::*, *};
18use super::tag::FUNCTION_TAGS;
19
20type Span<'a> = LocatedSpan<&'a str>;
21
22pub fn parse(input: &str) -> Result<Node, String> {
32 match parse_internal(Span::new(input), None) {
33 Ok((_, children)) => {
34 return Ok(Root(RootNode { children }));
35 }
36 Err(e) => {
37 return Err(format!("Parse error: {}", e));
38 }
39 };
40}
41
42fn take_until_tag(input: Span) -> IResult<Span, Span> {
43 let str = input.to_string();
44 let mut pos = 0usize;
45 loop {
46 match str[pos..].find('<') {
47 Some(index) => {
48 pos += index;
49 let offset = match str.chars().nth(pos + 1) {
50 Some('$') | Some('/') => 1,
51 _ => 0,
52 };
53 let next = &str[pos + offset + 1..pos + offset + 3];
54 if next.eq_ignore_ascii_case("mt") {
55 break;
56 }
57 pos += 1;
58 }
59 None => {
60 pos = str.len();
61 break;
62 }
63 }
64 }
65
66 return Ok(input.take_split(pos));
67}
68
69fn parse_internal<'a>(
70 mut input: Span<'a>,
71 current_tag: Option<String>,
72) -> IResult<Span<'a>, Vec<Node>> {
73 let mut children = vec![];
74
75 while input.len() > 0 {
76 let (_, pos) = position(input)?;
77 let (rest, text) = match opt(take_until_tag)(input)? {
78 (rest, Some(text)) => (rest, text),
79 _ => (Span::new(""), input),
80 };
81
82 if text.len() > 0 {
83 children.push(Text(TextNode {
84 value: text.to_string(),
85 line: pos.location_line(),
86 column: pos.get_utf8_column(),
87 offset: pos.location_offset(),
88 }))
89 }
90
91 if rest.len() == 0 {
92 break;
93 }
94
95 let (_, end_tag) = opt(tag_no_case("</"))(rest)?;
96 if end_tag.is_some() && current_tag.is_some() {
97 let current_tag_str = current_tag.unwrap();
98 let (rest, _) = alt((
99 tag_no_case(format!("</mt:{}>", current_tag_str).as_str()),
100 tag_no_case(format!("</mt{}>", current_tag_str).as_str()),
101 ))(rest)?;
102 input = rest;
103 break;
104 } else {
105 let (rest, node) = parse_tag(rest)?;
106 children.push(node);
107 input = rest;
108 };
109 }
110
111 return Ok((input, children));
112}
113
114fn parse_attribute_values(mut input: Span) -> IResult<Span, Vec<AttributeValue>> {
115 let mut values: Vec<AttributeValue> = vec![];
116
117 while input.len() > 0 {
118 let (_, pos) = position(input)?;
119 let (rest, ch) = opt(alt((char('"'), char('\''))))(input)?;
120 let (rest, value) = match ch {
121 Some(ch) => {
122 let (rest, value) = opt(alt((
123 recognize(tuple((char('<'), take_till(|c| c != '>'), char('>')))),
124 is_not(format!("{}\\", ch).as_str()),
125 )))(rest)?;
126 let (rest, _) = char(ch)(rest)?;
127 (rest, value)
128 }
129 None => opt(take_till(|c| is_space(c as u8)))(rest)?,
130 };
131 values.push(AttributeValue {
132 value: match value {
133 Some(value) => value.to_string(),
134 None => "".to_string(),
135 },
136 line: pos.location_line(),
137 column: pos.get_utf8_column(),
138 offset: pos.location_offset(),
139 });
140
141 input = rest;
142
143 let (rest, separator) = opt(char(','))(rest)?;
144 if separator.is_none() {
145 break;
146 }
147
148 input = rest;
149 }
150
151 Ok((input, values))
152}
153
154fn name_parser(input: Span) -> IResult<Span, Span> {
155 recognize(pair(
156 alt((alpha1, tag("_"))),
157 many0_count(alt((alphanumeric1, tag("_"), tag(":")))),
158 ))(input)
159}
160
161fn parse_attribute(input: Span) -> IResult<Span, Option<Attribute>> {
162 let (rest, _) = multispace0(input)?;
163 let (_, pos) = position(rest)?;
164
165 let (rest, name) = opt(name_parser)(rest)?;
166 let name = match name {
167 Some(name) => name,
168 None => return Ok((input, None)),
169 };
170
171 let (rest, _) = char('=')(rest)?;
172 let (rest, values) = parse_attribute_values(rest)?;
173
174 return Ok((
175 rest,
176 Some(Attribute {
177 name: name.to_string(),
178 values,
179 line: pos.location_line(),
180 column: pos.get_utf8_column(),
181 offset: pos.location_offset(),
182 }),
183 ));
184}
185
186fn parse_attributes(mut input: Span) -> IResult<Span, Vec<Attribute>> {
187 let mut attributes = vec![];
188
189 loop {
190 let (rest, attribute) = parse_attribute(input)?;
191 match attribute {
192 Some(attribute) => {
193 input = rest;
194 attributes.push(attribute)
195 }
196 None => break,
197 }
198 }
199
200 return Ok((input, attributes));
201}
202
203fn parse_tag(input: Span) -> IResult<Span, Node> {
204 let (_, pos) = position(input)?;
205 let (rest, head) = alt((tag_no_case("<mt"), tag_no_case("<$mt")))(input)?;
206 let (rest, _) = opt(char(':'))(rest)?;
207 let (rest, name) = name_parser(rest)?;
208 let (rest, attributes) = parse_attributes(rest)?;
209 let (rest, tail) = take_until(">")(rest)?;
210 let (rest, _) = anychar(rest)?;
211
212 if FUNCTION_TAGS.lock().unwrap().contains(&name.to_lowercase())
213 || &name.to_lowercase() == "else"
214 || &name.to_lowercase() == "elseif"
215 || (tail.len() >= 1
216 && (head.chars().nth(1).unwrap() == '$' || tail.chars().rev().nth(0).unwrap() == '/'))
217 {
218 return Ok((
219 rest,
220 FunctionTag(FunctionTagNode {
221 name: name.to_string(),
222 attributes,
223 line: pos.location_line(),
224 column: pos.get_utf8_column(),
225 offset: pos.location_offset(),
226 }),
227 ));
228 } else {
229 let (rest, children) = parse_internal(rest, Some(name.to_string()))?;
230 return Ok((
231 rest,
232 BlockTag(BlockTagNode {
233 name: name.to_string(),
234 children,
235 attributes,
236 line: pos.location_line(),
237 column: pos.get_utf8_column(),
238 offset: pos.location_offset(),
239 }),
240 ));
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 #[test]
249 fn test_parse_blank_attribute() {
250 let (rest, tag) = parse_tag(Span::new(r#"<$mt:Var name="search_link" strip="" trim="1" encode_html="1" setvar="search_link"$>"#)).unwrap();
251 assert_eq!(*rest.fragment(), "");
252 assert_eq!(
253 tag,
254 FunctionTag(FunctionTagNode {
255 name: "Var".to_string(),
256 attributes: vec![
257 Attribute {
258 name: "name".to_string(),
259 values: vec![AttributeValue {
260 value: "search_link".to_string(),
261 line: 1,
262 column: 15,
263 offset: 14,
264 }],
265 line: 1,
266 column: 10,
267 offset: 9,
268 },
269 Attribute {
270 name: "strip".to_string(),
271 values: vec![AttributeValue {
272 value: "".to_string(),
273 line: 1,
274 column: 35,
275 offset: 34,
276 }],
277 line: 1,
278 column: 29,
279 offset: 28,
280 },
281 Attribute {
282 name: "trim".to_string(),
283 values: vec![AttributeValue {
284 value: "1".to_string(),
285 line: 1,
286 column: 43,
287 offset: 42,
288 }],
289 line: 1,
290 column: 38,
291 offset: 37,
292 },
293 Attribute {
294 name: "encode_html".to_string(),
295 values: vec![AttributeValue {
296 value: "1".to_string(),
297 line: 1,
298 column: 59,
299 offset: 58,
300 }],
301 line: 1,
302 column: 47,
303 offset: 46,
304 },
305 Attribute {
306 name: "setvar".to_string(),
307 values: vec![AttributeValue {
308 value: "search_link".to_string(),
309 line: 1,
310 column: 70,
311 offset: 69,
312 }],
313 line: 1,
314 column: 63,
315 offset: 62,
316 },
317 ],
318 line: 1,
319 column: 1,
320 offset: 0
321 })
322 );
323 }
324
325 #[test]
326 fn test_parse_if_else() {
327 let (rest, tag) = parse_tag(Span::new(
328 r#"<mt:If name="blog_lang" eq="ja">ja_JP<mt:else><$mt:Var name="blog_lang"$></mt:If>"#,
329 ))
330 .unwrap();
331 assert_eq!(*rest.fragment(), "");
332 assert_eq!(
333 tag,
334 BlockTag(BlockTagNode {
335 name: "If".to_string(),
336 attributes: vec![
337 Attribute {
338 name: "name".to_string(),
339 values: vec![AttributeValue {
340 value: "blog_lang".to_string(),
341 line: 1,
342 column: 13,
343 offset: 12,
344 }],
345 line: 1,
346 column: 8,
347 offset: 7,
348 },
349 Attribute {
350 name: "eq".to_string(),
351 values: vec![AttributeValue {
352 value: "ja".to_string(),
353 line: 1,
354 column: 28,
355 offset: 27,
356 }],
357 line: 1,
358 column: 25,
359 offset: 24,
360 },
361 ],
362 line: 1,
363 column: 1,
364 offset: 0,
365 children: vec![
366 Text(TextNode {
367 value: "ja_JP".to_string(),
368 line: 1,
369 column: 33,
370 offset: 32,
371 }),
372 FunctionTag(FunctionTagNode {
373 name: "else".to_string(),
374 attributes: vec![],
375 line: 1,
376 column: 38,
377 offset: 37,
378 }),
379 FunctionTag(FunctionTagNode {
380 name: "Var".to_string(),
381 attributes: vec![Attribute {
382 name: "name".to_string(),
383 values: vec![AttributeValue {
384 value: "blog_lang".to_string(),
385 line: 1,
386 column: 61,
387 offset: 60,
388 }],
389 line: 1,
390 column: 56,
391 offset: 55,
392 }],
393 line: 1,
394 column: 47,
395 offset: 46,
396 }),
397 ],
398 })
399 );
400 }
401
402 #[test]
403 fn test_parse_tag_function_tag() {
404 let (rest, tag) = parse_tag(Span::new(r#"<mt:EntryTitle>"#)).unwrap();
405 assert_eq!(*rest.fragment(), "");
406 assert_eq!(
407 tag,
408 FunctionTag(FunctionTagNode {
409 name: "EntryTitle".to_string(),
410 attributes: vec![],
411 line: 1,
412 column: 1,
413 offset: 0
414 })
415 );
416 }
417
418 #[test]
419 fn test_parse_attribute() {
420 let (rest, attribute) = parse_attribute(Span::new(r#"limit="10""#)).unwrap();
421 assert_eq!(*rest.fragment(), "");
422 let attribute = attribute.unwrap();
423 assert_eq!(attribute.name, "limit");
424 assert_eq!(
425 attribute.values,
426 vec![AttributeValue {
427 value: "10".to_string(),
428 line: 1,
429 column: 7,
430 offset: 6
431 }]
432 );
433 }
434
435 #[test]
436 fn test_parse_attribute_single_quote() {
437 let (rest, attribute) = parse_attribute(Span::new(r#"limit='10'"#)).unwrap();
438 assert_eq!(*rest.fragment(), "");
439 let attribute = attribute.unwrap();
440 assert_eq!(attribute.name, "limit");
441 assert_eq!(
442 attribute.values,
443 vec![AttributeValue {
444 value: "10".to_string(),
445 line: 1,
446 column: 7,
447 offset: 6
448 }]
449 );
450 }
451
452 #[test]
453 fn test_parse_attribute_replace() {
454 let (rest, attribute) = parse_attribute(Span::new(r#"replace="a","b""#)).unwrap();
455 assert_eq!(*rest.fragment(), "");
456 let attribute = attribute.unwrap();
457 assert_eq!(attribute.name, "replace");
458 assert_eq!(
459 attribute.values,
460 vec![
461 AttributeValue {
462 value: "a".to_string(),
463 line: 1,
464 column: 9,
465 offset: 8
466 },
467 AttributeValue {
468 value: "b".to_string(),
469 line: 1,
470 column: 13,
471 offset: 12
472 }
473 ]
474 );
475 }
476}