pub enum TokenKind {
Show 17 variants
PIOpen,
PIClose,
Comment,
CData,
DocTypeOpen,
DocTypeValue,
DocTypeSubsetOpen,
DocTypeSubsetClose,
DocTypeClose,
TagOpen,
EndTagOpen,
TagClose,
TagSelfClose,
AttrName,
AttrValue,
Text,
Error,
}Expand description
TagSoup Token kind.
Variants§
PIOpen
The start of a processing instruction, e.g. <?xml.
let input = "<?xml ?>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::PIOpen);
assert_eq!(&input[tokens[0].span.range()], "xml");PIClose
The end of a processing instruction, i.e. ?>.
let input = "<?xml ?>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::PIClose);
assert_eq!(&input[tokens[1].span.range()], "?>");Comment
An HTML comment token, including the full <!-- ... --> text.
let input = "<!-- comment -->";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::Comment);
assert_eq!(&input[tokens[0].span.range()], "<!-- comment -->");CData
A CDATA section token, including the full <![CDATA[ ... ]]> text.
let input = "<![CDATA[x < y]]>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::CData);
assert_eq!(&input[tokens[0].span.range()], "<![CDATA[x < y]]>");DocTypeOpen
The identifier after <!, such as DOCTYPE.
let input = "<!DOCTYPE html>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::DocTypeOpen);
assert_eq!(&input[tokens[0].span.range()], "DOCTYPE");DocTypeValue
A value inside a doctype declaration.
let input = "<!DOCTYPE html>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::DocTypeValue);
assert_eq!(&input[tokens[1].span.range()], "html");DocTypeSubsetOpen
The [ character starting the DTD subset in a doctype declaration.
let input = "<!DOCTYPE html [ ... ]>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[2].kind, tagsoup::TokenKind::DocTypeSubsetOpen);
assert_eq!(&input[tokens[2].span.range()], "[");DocTypeSubsetClose
The ] character ending the DTD subset in a doctype declaration.
let input = "<!DOCTYPE html [ ... ]>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[4].kind, tagsoup::TokenKind::DocTypeSubsetClose);
assert_eq!(&input[tokens[4].span.range()], "]");DocTypeClose
The closing > of a doctype declaration.
let input = "<!DOCTYPE html>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[2].kind, tagsoup::TokenKind::DocTypeClose);
assert_eq!(&input[tokens[2].span.range()], ">");TagOpen
The tag name after <, such as div.
let input = "<div>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::TagOpen);
assert_eq!(&input[tokens[0].span.range()], "div");EndTagOpen
The tag name after </, such as div.
let input = "</div>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::EndTagOpen);
assert_eq!(&input[tokens[0].span.range()], "div");TagClose
The closing > for a start tag or end tag.
let input = "<div>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::TagClose);
assert_eq!(&input[tokens[1].span.range()], ">");TagSelfClose
The closing /> for a self-closing tag.
let input = "<br/>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::TagSelfClose);
assert_eq!(&input[tokens[1].span.range()], "/>");AttrName
An attribute name inside a tag or processing instruction.
let input = "<div class=hero>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::AttrName);
assert_eq!(&input[tokens[1].span.range()], "class");AttrValue
An attribute value without the leading =.
let input = "<div class=hero>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[2].kind, tagsoup::TokenKind::AttrValue);
assert_eq!(&input[tokens[2].span.range()], "hero");Text
Text content outside of tags.
let input = "hello";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[0].kind, tagsoup::TokenKind::Text);
assert_eq!(&input[tokens[0].span.range()], "hello");Error
An error token for unrecognized or malformed input.
Returned for malformed attributes, skipping until the next > character.
let input = "<div \0bad>";
let tokens: Vec<_> = tagsoup::Lexer::new(input.as_bytes()).collect();
assert_eq!(tokens[1].kind, tagsoup::TokenKind::Error);
assert_eq!(&input[tokens[1].span.range()], "\0bad");