use thiserror::Error;
use serde::{Serialize, Deserialize};
use super::*;
use crate::util::*;
#[derive(Debug, Error, Clone, Copy)]
pub enum GAVSyntaxErrorKind {
#[error("Input doesn't start with an HTML element.")]
InputDoesntStartWithHtmlElement,
#[error("Unexpected question mark instead of tag name.")]
UnexpectedQuestionMarkInsteadOfTagName,
#[error("Invalid start of tag name.")]
InvalidStartOfTagName,
#[error("Unexpected null character.")]
UnexpectedNullCharacter,
#[error("Unexpected solidus in tag.")]
UnexpectedSolidusInTag,
#[error("Unexpected equals sign before attribute name.")]
UnexpectedEqualsSignBeforeAttributeName,
#[error("Unexpected character in attribute name.")]
UnexpectedCharacterInAttributeName,
#[error("Missing attribute value.")]
MissingAttributeValue,
#[error("Missing whitespace between attributes.")]
MissingWhitespaceBetweenAttributes
}
#[derive(Debug, Error)]
pub enum GAVError {
#[error("Syntax error: {index}, {last_bite:?}, {kind:?}")]
Syntax {
index: usize,
last_bite: GAVLastBite,
kind: GAVSyntaxErrorKind
},
#[error(transparent)]
UnescapeTextError(#[from] UnescapeTextError),
#[error("The HTML tag wasn't finished.")]
UnfinishedTag
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub enum GAVLastBite {
Data,
TagOpen,
TagName,
SelfClosingStartTag,
BeforeAttributeName,
AttributeName,
AfterAttributeName,
BeforeAttributeValue,
AttributeValueDoubleQuoted,
AttributeValueSingleQuoted,
AttributeValueUnquoted,
AfterAttributeValueQuoted,
Done
}
#[derive(Debug)]
struct GAVState<'a> {
input: &'a str,
name: &'a str,
last_bite: GAVLastBite,
ret: Option<Option<&'a str>>,
attr_name_start: usize,
attr_name_end: usize,
attr_value_start: usize
}
type LB = GAVLastBite;
type EK = GAVSyntaxErrorKind;
pub fn get_attribute_value<'a>(input: &'a str, name: &'a str) -> Result<Option<Option<String>>, GAVError> {
debug!(parse::html::get_attribute_value, &(), input, name);
let mut state = GAVState {
input,
name,
last_bite: LB::Data,
ret: None,
attr_name_start: 0,
attr_name_end: 0,
attr_value_start: 0
};
for (i, c) in input.chars().enumerate() {
if let Err(e) = munch(&mut state, i, c) {
return Err(GAVError::Syntax {
index: i,
last_bite: state.last_bite,
kind: e
});
}
if matches!(state.last_bite, LB::Done) {
return Ok(match state.ret {
Some(Some(value)) => Some(Some(unescape_text(value)?)),
Some(None) => Some(None),
None => None
});
}
}
Err(GAVError::UnfinishedTag)
}
fn munch(state: &mut GAVState, i: usize, c: char) -> Result<(), GAVSyntaxErrorKind> {
debug!(parse::html::get_attribute::munch, &(), state, i, c);
match (state.last_bite, c) {
(LB::Data, '<') => {state.last_bite = LB::TagOpen;},
(LB::Data, _ ) => Err(EK::InputDoesntStartWithHtmlElement)?,
(LB::TagOpen, 'a'..='z' | 'A'..='Z') => {state.last_bite = LB::TagName;},
(LB::TagOpen, '?' ) => Err(EK::UnexpectedQuestionMarkInsteadOfTagName)?,
(LB::TagOpen, _ ) => Err(EK::InvalidStartOfTagName)?,
(LB::TagName, '\t' | '\r' | '\n' | ' ') => {state.last_bite = LB::BeforeAttributeName;},
(LB::TagName, '/' ) => {state.last_bite = LB::SelfClosingStartTag;},
(LB::TagName, '\0' ) => Err(EK::UnexpectedNullCharacter)?,
(LB::TagName, '>' ) => {state.last_bite = LB::Done;},
(LB::TagName, _ ) => {},
(LB::SelfClosingStartTag, '>') => {state.last_bite = LB::Done;},
(LB::SelfClosingStartTag, _ ) => Err(EK::UnexpectedSolidusInTag)?,
(LB::BeforeAttributeName, '\t' | '\r' | '\n' | ' ') => {},
(LB::BeforeAttributeName, '/' | '>' ) => {state.last_bite = LB::AfterAttributeName; munch(state, i, c)?;},
(LB::BeforeAttributeName, '=' ) => Err(EK::UnexpectedEqualsSignBeforeAttributeName)?,
(LB::BeforeAttributeName, _ ) => {state.last_bite = LB::AttributeName; state.attr_name_start = i; munch(state, i, c)?;},
(LB::AttributeName, '\t' | '\r' | '\n' | ' ' | '/' | '>' ) => {state.last_bite = LB::AfterAttributeName ; state.attr_name_end = i; if &state.input[state.attr_name_start..state.attr_name_end] == state.name {state.ret = Some(None);} else {state.ret = None;} munch(state, i, c)?;},
(LB::AttributeName, '=' ) => {state.last_bite = LB::BeforeAttributeValue; state.attr_name_end = i;},
(LB::AttributeName, '\0' | '"' | '\'' | '<' ) => Err(EK::UnexpectedCharacterInAttributeName)?,
(LB::AttributeName, _ ) => {},
(LB::AfterAttributeName, '\t' | '\r' | '\n' | ' ') => {},
(LB::AfterAttributeName, '/' ) => {state.last_bite = LB::SelfClosingStartTag ;},
(LB::AfterAttributeName, '=' ) => {state.last_bite = LB::BeforeAttributeValue;},
(LB::AfterAttributeName, '>' ) => {state.last_bite = LB::Done; if &state.input[state.attr_name_start..i] == state.name {state.ret = Some(None);}},
(LB::AfterAttributeName, _ ) => {state.last_bite = LB::AttributeName; state.attr_name_start = i; munch(state, i, c)?;},
(LB::BeforeAttributeValue, '\t' | '\r' | '\n' | ' ') => {},
#[allow(clippy::arithmetic_side_effects, reason = "Can't happen.")]
(LB::BeforeAttributeValue, '"' ) => {state.last_bite = LB::AttributeValueDoubleQuoted; state.attr_value_start = i+1;},
#[allow(clippy::arithmetic_side_effects, reason = "Can't happen.")]
(LB::BeforeAttributeValue, '\'' ) => {state.last_bite = LB::AttributeValueSingleQuoted; state.attr_value_start = i+1;},
(LB::BeforeAttributeValue, '>' ) => Err(EK::MissingAttributeValue)?,
(LB::BeforeAttributeValue, _ ) => {state.last_bite = LB::AttributeValueUnquoted; state.attr_value_start = i; munch(state, i, c)?;},
(LB::AttributeValueDoubleQuoted, '"' ) => {state.last_bite = LB::AfterAttributeValueQuoted; if &state.input[state.attr_name_start..state.attr_name_end] == state.name {state.ret = Some(Some(&state.input[state.attr_value_start..i]));}},
(LB::AttributeValueDoubleQuoted, '&' ) => {}, (LB::AttributeValueDoubleQuoted, '\0') => Err(EK::UnexpectedNullCharacter)?,
(LB::AttributeValueDoubleQuoted, _ ) => {},
(LB::AttributeValueSingleQuoted, '\'') => {state.last_bite = LB::AfterAttributeValueQuoted; if &state.input[state.attr_name_start..state.attr_name_end] == state.name {state.ret = Some(Some(&state.input[state.attr_value_start..i]));}},
(LB::AttributeValueSingleQuoted, '&' ) => {}, (LB::AttributeValueSingleQuoted, '\0') => Err(EK::UnexpectedNullCharacter)?,
(LB::AttributeValueSingleQuoted, _ ) => {},
(LB::AttributeValueUnquoted, '\t' | '\r' | '\n' | ' ') => {state.last_bite = LB::BeforeAttributeName; if &state.input[state.attr_name_start..state.attr_name_end] == state.name {state.ret = Some(Some(&state.input[state.attr_value_start..i]));}},
(LB::AttributeValueUnquoted, '&' ) => {}, (LB::AttributeValueUnquoted, '>' ) => {state.last_bite = LB::Done; if &state.input[state.attr_name_start..state.attr_name_end] == state.name {state.ret = Some(Some(&state.input[state.attr_value_start..i]));}},
(LB::AttributeValueUnquoted, '\0' ) => Err(EK::UnexpectedNullCharacter)?,
(LB::AttributeValueUnquoted, _ ) => {},
(LB::AfterAttributeValueQuoted, '\t' | '\r' | '\n' | ' ') => {state.last_bite = LB::BeforeAttributeName;},
(LB::AfterAttributeValueQuoted, '/' ) => {state.last_bite = LB::SelfClosingStartTag;},
(LB::AfterAttributeValueQuoted, '>' ) => {state.last_bite = LB::Done;},
(LB::AfterAttributeValueQuoted, _ ) => Err(EK::MissingWhitespaceBetweenAttributes)?,
(LB::Done, _) => panic!("Logic error.")
}
Ok(())
}