tag_parser/
lib.rs

1
2use parcelona::parser_combinators::{*};
3use parcelona::u8::{*};
4
5#[derive(Debug)]
6pub enum Item<I> {
7	IText(I),
8	ITag(Tag<I>),
9} 
10
11#[derive(Debug)]
12pub struct Tag<I> {
13	pub name: I,
14	pub attributes: Option<Vec<(I,I)>>,
15	pub items: Option<Vec<Item<I>>>,
16}
17
18const OPEN_TAG_NOTFOUND:  &str = r#"opent tag '<' not found"#;
19const CLOSE_TAG_NOTFOUND: &str = r#"close tag '>' not found"#;
20const SEP_NOTFOUND:       &str = r#"'=' not found"#;
21const END_TAG_NOTFOUND:   &str = r#"end tag '</ >' not found"#;
22const QUOTE_NOTFOUND:     &str = r#"quote " not found"#;
23const NAME_ERR:           &str = r#"name parse error"#;
24const VALUE_ERR:          &str = r#"value parse error"#;
25const ATTR_ERR:           &str = r#"attribut parse error"#;
26const TEXT_ERR1:          &str = r#"text parse error"#;
27const HEAD_ERR:           &str = r#"head parse error"#;
28const CONTENT_ERR:        &str = r#"inner content parse error"#;
29
30static NAME: StaticClassOfSymbols<u8> = StaticClassOfSymbols::new()
31	.range_enable_set(ALPHA_NUM)
32	.one_enable_set(&[45,46,95]); // - . _
33
34static VALUE: StaticClassOfSymbols<u8> = StaticClassOfSymbols::new()
35	.one_disable_set(&[34]) // "
36	.default_enable_one(true);
37
38static TEXT: StaticClassOfSymbols<u8> = StaticClassOfSymbols::new()
39	.one_disable_set(br#"<>\"#)  // <>\
40	.parts_enable_set(&[br#"\\"#, br#"\<"#, br#"\>"#])
41	.default_enable_one(true);   
42
43fn parse_tag(input: &[u8]) -> ParseResult<u8,Item<&[u8]>> {
44	let space  = seq(is_space);
45	let open   = between_opt(space, starts_with(b"<"), space).msg_err(OPEN_TAG_NOTFOUND);
46	let close  = between_opt(space, starts_with(b">"), space).msg_err(CLOSE_TAG_NOTFOUND);
47	let sep    = starts_with(b"=").msg_err(SEP_NOTFOUND);
48	let quotes = between_opt(space, starts_with(b"\""), space).msg_err(QUOTE_NOTFOUND);
49	let name_parser  = between_opt(space, &NAME, space).msg_err(NAME_ERR);
50	let value_parser = fmap(between(quotes, &VALUE, quotes).msg_err(VALUE_ERR),<[u8]>::trim_ascii);
51	let text  = fmap(TEXT.msg_err(TEXT_ERR1),|x|{Item::<&[u8]>::IText(<[u8]>::trim_ascii(x))});
52	let attrs = sep_pair(name_parser, sep, value_parser).msg_err(ATTR_ERR).more().option();
53	let close_slash = between_opt(space, any(b"/"), space);
54
55    // firs line tag
56	let (input, (tag_name, tag_attrs)) = right(open, pair(name_parser, attrs))
57		.msg_err(HEAD_ERR).strerr().parse(input)?;
58    // /> self closed tag
59	let (input, cl_slash) = left(close_slash.option(),close).strerr().parse(input)?;
60	if cl_slash.is_none() {
61    	// inner content
62		let (input, it) = (text,parse_tag).alt().msg_err(CONTENT_ERR).more().strerr().parse(input)?;
63    	// close line tag
64		let (input, _) = between(open, pair(close_slash, starts_with(tag_name)), close)
65			.msg_err(END_TAG_NOTFOUND).strerr().parse(input)?;
66
67		return Ok((input, Item::<&[u8]>::ITag(Tag {
68			name: tag_name,
69			attributes: tag_attrs,
70			items: Some(it),
71		})));
72	}
73		Ok((input, Item::<&[u8]>::ITag(Tag {
74			name: tag_name,
75			attributes: tag_attrs,
76			items: None,
77		})))	
78}
79
80pub fn parse<'a>(input: &'a[u8]) -> ParseResult<'a,u8,Vec<Item::<&'a[u8]>>> {
81	let text  = fmap(TEXT.msg_err(TEXT_ERR1),|x|{Item::<&[u8]>::IText(<[u8]>::trim_ascii(x))});
82	(text,parse_tag).alt().msg_err(CONTENT_ERR).more().strerr().parse(input)
83}