1extern crate nom;
2
3use nom::{
4 branch::alt,
5 bytes::complete::{is_not, tag, tag_no_case, take_till, take_until},
6 character::{
7 complete::{alpha1, alphanumeric1, anychar, char, multispace0},
8 },
9 combinator::{opt, recognize},
10 multi::many0_count,
11 sequence::{pair, tuple},
12 IResult, InputTake,
13};
14use nom_locate::{position, LocatedSpan};
15
16use super::ast::{Node::*, *};
17use super::tag::FUNCTION_TAGS;
18
19type Span<'a> = LocatedSpan<&'a str>;
20
21pub fn parse(input: &str) -> Result<Node, String> {
31 match parse_internal(Span::new(input), None) {
32 Ok((_, children)) => {
33 return Ok(Root(RootNode { children }));
34 }
35 Err(e) => {
36 return Err(format!("Parse error: {}", e));
37 }
38 };
39}
40
41fn take_until_tag(input: Span) -> IResult<Span, Span> {
42 let mut pos = 0usize;
43 let chars: Vec<char> = input.chars().collect();
44
45 while pos < chars.len() {
46 if chars[pos] == '<' {
47 if pos + 2 < chars.len() {
49 let next_char = chars[pos + 1];
50 let offset = if next_char == '$' || next_char == '/' { 1 } else { 0 };
51
52 if pos + offset + 2 < chars.len() {
53 let next_chars = &chars[pos + offset + 1..pos + offset + 3];
54 let next_str: String = next_chars.iter().collect();
55 if next_str.eq_ignore_ascii_case("mt") {
56 break;
57 }
58 }
59 }
60 }
61 pos += 1;
62 }
63
64 let byte_pos = input.chars().take(pos).map(|c| c.len_utf8()).sum();
66 Ok(input.take_split(byte_pos))
67}
68
69fn parse_internal<'a>(
70 mut input: Span<'a>,
71 current_tag: Option<String>,
72) -> IResult<Span<'a>, Vec<Node>> {
73 let mut children = vec![];
74
75 while input.len() > 0 {
76 let (_, pos) = position(input)?;
77 let (rest, text) = match opt(take_until_tag)(input)? {
78 (rest, Some(text)) => (rest, text),
79 _ => (Span::new(""), input),
80 };
81
82 if text.len() > 0 {
83 children.push(Text(TextNode {
84 value: text.to_string(),
85 line: pos.location_line(),
86 column: pos.get_utf8_column(),
87 offset: pos.location_offset(),
88 }))
89 }
90
91 if rest.len() == 0 {
92 break;
93 }
94
95 let (_, end_tag) = opt(tag_no_case("</"))(rest)?;
96 if end_tag.is_some() && current_tag.is_some() {
97 let current_tag_str = current_tag.unwrap();
98 let (rest, _) = alt((
99 tag_no_case(format!("</mt:{}>", current_tag_str).as_str()),
100 tag_no_case(format!("</mt{}>", current_tag_str).as_str()),
101 ))(rest)?;
102 input = rest;
103 break;
104 } else {
105 let (rest, node) = parse_tag(rest)?;
106 children.push(node);
107 input = rest;
108 };
109 }
110
111 return Ok((input, children));
112}
113
114fn parse_attribute_values(mut input: Span) -> IResult<Span, Vec<AttributeValue>> {
115 let mut values: Vec<AttributeValue> = vec![];
116
117 while input.len() > 0 {
118 let (_, pos) = position(input)?;
119 let (rest, ch) = opt(alt((char('"'), char('\''))))(input)?;
120 let (rest, value) = match ch {
121 Some(ch) => {
122 let (rest, value) = opt(alt((
123 recognize(tuple((char('<'), take_till(|c| c != '>'), char('>')))),
124 is_not(format!("{}\\", ch).as_str()),
125 )))(rest)?;
126 let (rest, _) = char(ch)(rest)?;
127 (rest, value)
128 }
129 None => opt(take_till(|c: char| c.is_whitespace()))(rest)?,
130 };
131 values.push(AttributeValue {
132 value: match value {
133 Some(value) => value.to_string(),
134 None => "".to_string(),
135 },
136 line: pos.location_line(),
137 column: pos.get_utf8_column(),
138 offset: pos.location_offset(),
139 });
140
141 input = rest;
142
143 let (rest, separator) = opt(char(','))(rest)?;
144 if separator.is_none() {
145 break;
146 }
147
148 input = rest;
149 }
150
151 Ok((input, values))
152}
153
154fn name_parser(input: Span) -> IResult<Span, Span> {
155 recognize(pair(
156 alt((alpha1, tag("_"))),
157 many0_count(alt((alphanumeric1, tag("_"), tag(":")))),
158 ))(input)
159}
160
161fn parse_attribute(input: Span) -> IResult<Span, Option<Attribute>> {
162 let (rest, _) = multispace0(input)?;
163 let (_, pos) = position(rest)?;
164
165 let (rest, name) = opt(name_parser)(rest)?;
166 let name = match name {
167 Some(name) => name,
168 None => return Ok((input, None)),
169 };
170
171 let (rest, _) = char('=')(rest)?;
172 let (rest, values) = parse_attribute_values(rest)?;
173
174 return Ok((
175 rest,
176 Some(Attribute {
177 name: name.to_string(),
178 values,
179 line: pos.location_line(),
180 column: pos.get_utf8_column(),
181 offset: pos.location_offset(),
182 }),
183 ));
184}
185
186fn parse_attributes(mut input: Span) -> IResult<Span, Vec<Attribute>> {
187 let mut attributes = vec![];
188
189 loop {
190 let (rest, attribute) = parse_attribute(input)?;
191 match attribute {
192 Some(attribute) => {
193 input = rest;
194 attributes.push(attribute)
195 }
196 None => break,
197 }
198 }
199
200 return Ok((input, attributes));
201}
202
203fn parse_tag(input: Span) -> IResult<Span, Node> {
204 let (_, pos) = position(input)?;
205 let (rest, head) = alt((tag_no_case("<mt"), tag_no_case("<$mt")))(input)?;
206 let (rest, _) = opt(char(':'))(rest)?;
207 let (rest, name) = name_parser(rest)?;
208 let (rest, attributes) = parse_attributes(rest)?;
209 let (rest, tail) = take_until(">")(rest)?;
210 let (rest, _) = anychar(rest)?;
211
212 if FUNCTION_TAGS.lock().unwrap().contains(&name.to_lowercase())
213 || &name.to_lowercase() == "else"
214 || &name.to_lowercase() == "elseif"
215 || (tail.len() >= 1
216 && (head.chars().nth(1).unwrap() == '$' || tail.chars().rev().nth(0).unwrap() == '/'))
217 {
218 return Ok((
219 rest,
220 FunctionTag(FunctionTagNode {
221 name: name.to_string(),
222 attributes,
223 line: pos.location_line(),
224 column: pos.get_utf8_column(),
225 offset: pos.location_offset(),
226 }),
227 ));
228 } else {
229 let (rest, children) = parse_internal(rest, Some(name.to_string()))?;
230 return Ok((
231 rest,
232 BlockTag(BlockTagNode {
233 name: name.to_string(),
234 children,
235 attributes,
236 line: pos.location_line(),
237 column: pos.get_utf8_column(),
238 offset: pos.location_offset(),
239 }),
240 ));
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 #[test]
249 fn test_parse_blank_attribute() {
250 let (rest, tag) = parse_tag(Span::new(r#"<$mt:Var name="search_link" strip="" trim="1" encode_html="1" setvar="search_link"$>"#)).unwrap();
251 assert_eq!(*rest.fragment(), "");
252 assert_eq!(
253 tag,
254 FunctionTag(FunctionTagNode {
255 name: "Var".to_string(),
256 attributes: vec![
257 Attribute {
258 name: "name".to_string(),
259 values: vec![AttributeValue {
260 value: "search_link".to_string(),
261 line: 1,
262 column: 15,
263 offset: 14,
264 }],
265 line: 1,
266 column: 10,
267 offset: 9,
268 },
269 Attribute {
270 name: "strip".to_string(),
271 values: vec![AttributeValue {
272 value: "".to_string(),
273 line: 1,
274 column: 35,
275 offset: 34,
276 }],
277 line: 1,
278 column: 29,
279 offset: 28,
280 },
281 Attribute {
282 name: "trim".to_string(),
283 values: vec![AttributeValue {
284 value: "1".to_string(),
285 line: 1,
286 column: 43,
287 offset: 42,
288 }],
289 line: 1,
290 column: 38,
291 offset: 37,
292 },
293 Attribute {
294 name: "encode_html".to_string(),
295 values: vec![AttributeValue {
296 value: "1".to_string(),
297 line: 1,
298 column: 59,
299 offset: 58,
300 }],
301 line: 1,
302 column: 47,
303 offset: 46,
304 },
305 Attribute {
306 name: "setvar".to_string(),
307 values: vec![AttributeValue {
308 value: "search_link".to_string(),
309 line: 1,
310 column: 70,
311 offset: 69,
312 }],
313 line: 1,
314 column: 63,
315 offset: 62,
316 },
317 ],
318 line: 1,
319 column: 1,
320 offset: 0
321 })
322 );
323 }
324
325 #[test]
326 fn test_parse_if_else() {
327 let (rest, tag) = parse_tag(Span::new(
328 r#"<mt:If name="blog_lang" eq="ja">ja_JP<mt:else><$mt:Var name="blog_lang"$></mt:If>"#,
329 ))
330 .unwrap();
331 assert_eq!(*rest.fragment(), "");
332 assert_eq!(
333 tag,
334 BlockTag(BlockTagNode {
335 name: "If".to_string(),
336 attributes: vec![
337 Attribute {
338 name: "name".to_string(),
339 values: vec![AttributeValue {
340 value: "blog_lang".to_string(),
341 line: 1,
342 column: 13,
343 offset: 12,
344 }],
345 line: 1,
346 column: 8,
347 offset: 7,
348 },
349 Attribute {
350 name: "eq".to_string(),
351 values: vec![AttributeValue {
352 value: "ja".to_string(),
353 line: 1,
354 column: 28,
355 offset: 27,
356 }],
357 line: 1,
358 column: 25,
359 offset: 24,
360 },
361 ],
362 line: 1,
363 column: 1,
364 offset: 0,
365 children: vec![
366 Text(TextNode {
367 value: "ja_JP".to_string(),
368 line: 1,
369 column: 33,
370 offset: 32,
371 }),
372 FunctionTag(FunctionTagNode {
373 name: "else".to_string(),
374 attributes: vec![],
375 line: 1,
376 column: 38,
377 offset: 37,
378 }),
379 FunctionTag(FunctionTagNode {
380 name: "Var".to_string(),
381 attributes: vec![Attribute {
382 name: "name".to_string(),
383 values: vec![AttributeValue {
384 value: "blog_lang".to_string(),
385 line: 1,
386 column: 61,
387 offset: 60,
388 }],
389 line: 1,
390 column: 56,
391 offset: 55,
392 }],
393 line: 1,
394 column: 47,
395 offset: 46,
396 }),
397 ],
398 })
399 );
400 }
401
402 #[test]
403 fn test_parse_tag_function_tag() {
404 let (rest, tag) = parse_tag(Span::new(r#"<mt:EntryTitle>"#)).unwrap();
405 assert_eq!(*rest.fragment(), "");
406 assert_eq!(
407 tag,
408 FunctionTag(FunctionTagNode {
409 name: "EntryTitle".to_string(),
410 attributes: vec![],
411 line: 1,
412 column: 1,
413 offset: 0
414 })
415 );
416 }
417
418 #[test]
419 fn test_parse_multi_byte_character() {
420 let (rest, tag) = parse_tag(Span::new(r#"<mt:If name="foo">ほげ</mt:If>"#)).unwrap();
421 assert_eq!(*rest.fragment(), "");
422 assert_eq!(
423 tag,
424 BlockTag(BlockTagNode {
425 name: "If".to_string(),
426 attributes: vec![Attribute {
427 name: "name".to_string(),
428 values: vec![AttributeValue {
429 value: "foo".to_string(),
430 line: 1,
431 column: 13,
432 offset: 12,
433 }],
434 line: 1,
435 column: 8,
436 offset: 7,
437 }],
438 children: vec![Text(TextNode {
439 value: "ほげ".to_string(),
440 line: 1,
441 column: 19,
442 offset: 18,
443 })],
444 line: 1,
445 column: 1,
446 offset: 0
447 })
448 );
449 }
450
451 #[test]
452 fn test_parse_multi_byte_character_in_attribute() {
453 let (rest, tag) = parse_tag(Span::new(r#"<mt:Var name="日本語の変数名" value="こんにちは世界">"#)).unwrap();
454 assert_eq!(*rest.fragment(), "");
455 assert_eq!(
456 tag,
457 FunctionTag(FunctionTagNode {
458 name: "Var".to_string(),
459 attributes: vec![
460 Attribute {
461 name: "name".to_string(),
462 values: vec![AttributeValue {
463 value: "日本語の変数名".to_string(),
464 line: 1,
465 column: 14,
466 offset: 13,
467 }],
468 line: 1,
469 column: 9,
470 offset: 8,
471 },
472 Attribute {
473 name: "value".to_string(),
474 values: vec![AttributeValue {
475 value: "こんにちは世界".to_string(),
476 line: 1,
477 column: 30,
478 offset: 43,
479 }],
480 line: 1,
481 column: 24,
482 offset: 37,
483 },
484 ],
485 line: 1,
486 column: 1,
487 offset: 0
488 })
489 );
490 }
491
492 #[test]
493 fn test_parse_multi_byte_character_in_text() {
494 let (rest, tag) = parse_tag(Span::new(r#"<mt:If name="test">これは日本語のテキストです。Hello World!</mt:If>"#)).unwrap();
495 assert_eq!(*rest.fragment(), "");
496 assert_eq!(
497 tag,
498 BlockTag(BlockTagNode {
499 name: "If".to_string(),
500 attributes: vec![Attribute {
501 name: "name".to_string(),
502 values: vec![AttributeValue {
503 value: "test".to_string(),
504 line: 1,
505 column: 13,
506 offset: 12,
507 }],
508 line: 1,
509 column: 8,
510 offset: 7,
511 }],
512 children: vec![Text(TextNode {
513 value: "これは日本語のテキストです。Hello World!".to_string(),
514 line: 1,
515 column: 20,
516 offset: 19,
517 })],
518 line: 1,
519 column: 1,
520 offset: 0
521 })
522 );
523 }
524
525 #[test]
526 fn test_parse_attribute() {
527 let (rest, attribute) = parse_attribute(Span::new(r#"limit="10""#)).unwrap();
528 assert_eq!(*rest.fragment(), "");
529 let attribute = attribute.unwrap();
530 assert_eq!(attribute.name, "limit");
531 assert_eq!(
532 attribute.values,
533 vec![AttributeValue {
534 value: "10".to_string(),
535 line: 1,
536 column: 7,
537 offset: 6
538 }]
539 );
540 }
541
542 #[test]
543 fn test_parse_attribute_single_quote() {
544 let (rest, attribute) = parse_attribute(Span::new(r#"limit='10'"#)).unwrap();
545 assert_eq!(*rest.fragment(), "");
546 let attribute = attribute.unwrap();
547 assert_eq!(attribute.name, "limit");
548 assert_eq!(
549 attribute.values,
550 vec![AttributeValue {
551 value: "10".to_string(),
552 line: 1,
553 column: 7,
554 offset: 6
555 }]
556 );
557 }
558
559 #[test]
560 fn test_parse_attribute_replace() {
561 let (rest, attribute) = parse_attribute(Span::new(r#"replace="a","b""#)).unwrap();
562 assert_eq!(*rest.fragment(), "");
563 let attribute = attribute.unwrap();
564 assert_eq!(attribute.name, "replace");
565 assert_eq!(
566 attribute.values,
567 vec![
568 AttributeValue {
569 value: "a".to_string(),
570 line: 1,
571 column: 9,
572 offset: 8
573 },
574 AttributeValue {
575 value: "b".to_string(),
576 line: 1,
577 column: 13,
578 offset: 12
579 }
580 ]
581 );
582 }
583}