1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
//! Types for HTML attributes.
use std::collections::{btree_map, BTreeMap};
use std::iter::FromIterator;
use std::ops::{Index, Range};
use crate::offset::Offset;
/// A map of HTML attributes.
///
/// Does not preserve the order of attributes.
/// Iterating always yields attributes in order by name.
///
/// # Example
///
/// ```
/// # use html5tokenizer::attr::AttributeMap;
/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())]
/// .into_iter()
/// .collect();
/// assert_eq!(&attrs["href"], "http://example.com");
/// ```
#[derive(Debug, Default, PartialEq, Eq)]
pub struct AttributeMap<O> {
pub(crate) inner: BTreeMap<String, AttrInternal<O>>,
}
/// The value type internally used by the [`AttributeMap`].
/// Not part of the public API.
#[derive(Debug, Eq, PartialEq)]
pub(crate) struct AttrInternal<O> {
pub value: String,
/// The start offset of the attribute name.
pub name_offset: O,
/// The start offset of the attribute value.
/// For the empty attribute syntax this is just `S::Offset::default()`.
/// We intentionally don't use `Option<S::Offset>` here to spare us a byte per attribute.
pub value_offset: O,
pub value_syntax: Option<AttrValueSyntax>,
}
/// The syntax of the attribute value.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum AttrValueSyntax {
/// An unquoted attribute value, e.g. `id=foo`.
Unquoted,
/// A single-quoted attribute value, e.g. `id='foo'`.
SingleQuoted,
/// A double-quoted attribute value, e.g. `id="foo"`.
DoubleQuoted,
}
/// An HTML attribute borrowed from an [`AttributeMap`].
#[derive(Debug, Eq, PartialEq)]
pub struct Attribute<'a, O> {
name: &'a str,
map_val: &'a AttrInternal<O>,
}
/// An owned HTML attribute.
#[derive(Debug, PartialEq, Eq)]
pub struct AttributeOwned<O> {
/// The attribute name.
pub name: String,
/// The attribute value.
pub value: String,
/// The start offset of the attribute name.
pub name_offset: O,
/// The start offset of the attribute value.
/// `None` in case of the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub value_offset: Option<O>,
/// The syntax of the attribute value.
/// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub value_syntax: Option<AttrValueSyntax>,
}
impl<O> AttributeMap<O> {
/// Returns the attribute with the given name.
pub fn get(&self, name: &str) -> Option<Attribute<O>> {
self.inner
.get_key_value(name)
.map(|(name, map_val)| Attribute { name, map_val })
}
}
impl<'a, O: Offset> Attribute<'a, O> {
/// Returns the attribute name.
pub fn name(&self) -> &'a str {
self.name
}
/// Returns the attribute value.
pub fn value(&self) -> &'a str {
&self.map_val.value
}
/// Calculates the span of the attribute name and returns it.
pub fn name_span(&self) -> Range<O> {
self.map_val.name_offset..self.map_val.name_offset + self.name.len()
}
/// For explicitly defined values calculates the span of the attribute value and returns it.
///
/// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub fn value_span(&self) -> Option<Range<O>> {
if self.map_val.value_syntax.is_none() {
return None;
}
Some(self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len())
}
/// Returns the attribute value syntax in case the value is explicitly defined.
///
/// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub fn value_syntax(&self) -> Option<AttrValueSyntax> {
self.map_val.value_syntax
}
}
// We cannot impl Index<Output=Attribute> because Index::index returns a reference of
// the Output type (and you cannot return a value referencing a temporary value).
impl<O> Index<&str> for AttributeMap<O> {
type Output = str;
fn index(&self, name: &str) -> &Self::Output {
&self.inner[name].value
}
}
impl<O> IntoIterator for AttributeMap<O> {
type Item = AttributeOwned<O>;
type IntoIter = AttrIntoIter<O>;
fn into_iter(self) -> Self::IntoIter {
AttrIntoIter(self.inner.into_iter())
}
}
/// A consuming iterator over the attributes of an [`AttributeMap`].
pub struct AttrIntoIter<O>(btree_map::IntoIter<String, AttrInternal<O>>);
impl<O> Iterator for AttrIntoIter<O> {
type Item = AttributeOwned<O>;
fn next(&mut self) -> Option<Self::Item> {
let (name, map_val) = self.0.next()?;
Some(AttributeOwned {
name,
value: map_val.value,
name_offset: map_val.name_offset,
value_offset: map_val
.value_syntax
.is_some()
.then_some(map_val.value_offset),
value_syntax: map_val.value_syntax,
})
}
}
impl<'a, O> IntoIterator for &'a AttributeMap<O> {
type Item = Attribute<'a, O>;
type IntoIter = AttrIter<'a, O>;
fn into_iter(self) -> Self::IntoIter {
AttrIter(self.inner.iter())
}
}
/// A borrowed iterator over the attributes of an [`AttributeMap`].
pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>);
impl<'a, S> Iterator for AttrIter<'a, S> {
type Item = Attribute<'a, S>;
fn next(&mut self) -> Option<Self::Item> {
let (name, map_val) = self.0.next()?;
Some(Attribute { name, map_val })
}
}
impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self {
Self {
inner: iter
.into_iter()
.map(|(name, value)| {
(
name,
AttrInternal {
value,
name_offset: O::default(),
value_offset: O::default(),
value_syntax: Some(AttrValueSyntax::DoubleQuoted),
},
)
})
.collect(),
}
}
}
impl<O: Offset> AttrInternal<O> {
pub(crate) fn name_span(&self, name_len: usize) -> Range<O> {
self.name_offset..self.name_offset + name_len
}
}