dtd_parser/
attlist.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{is_not, tag},
4    character::complete::{char, multispace0, multispace1, space1},
5    combinator::{map, opt},
6    multi::{many0, separated_list1},
7    sequence::{delimited, pair, preceded, terminated, tuple},
8};
9use nom_tracable::tracable_parser;
10
11use super::{name, nmtoken, reference, Name, Nmtoken, Reference, Result, Span};
12
13/// 属性可提供有关元素的额外信息。
14///
15/// 属性总是被置于某元素的开始标签中。属性总是以名称/值的形式成对出现的。
16#[derive(Clone, Debug, Display)]
17#[display(
18    fmt = "<!ATTLIST {} {}>",
19    name,
20    "attdefs.iter().map(|v|v.to_string()).collect::<Vec<_>>().join(\" \")"
21)]
22pub struct AttlistDecl {
23    pub name: Name,
24    pub attdefs: Vec<AttDef>,
25}
26
27/// AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
28#[tracable_parser]
29pub(super) fn attlist_decl(i: Span) -> Result<AttlistDecl> {
30    map(
31        tuple((
32            preceded(tuple((tag("<!ATTLIST"), multispace1)), name),
33            terminated(many0(attdef), tuple((multispace0, tag(">")))),
34        )),
35        |(name, attdefs)| AttlistDecl {
36            name,
37            attdefs: attdefs,
38        },
39    )(i)
40}
41
42#[derive(Clone, Debug, Display)]
43#[display(fmt = "{} {} {}", name, atttype, default_decl)]
44pub struct AttDef {
45    pub name: Name,
46    pub atttype: AttType,
47    pub default_decl: DefaultDecl,
48}
49
50/// AttDef ::= S Name S AttType S DefaultDecl
51#[tracable_parser]
52fn attdef(i: Span) -> Result<AttDef> {
53    map(
54        tuple((
55            preceded(multispace0, name),
56            preceded(multispace0, atttype),
57            preceded(multispace0, default_decl),
58        )),
59        |(name, atttype, default_decl)| AttDef {
60            name,
61            atttype,
62            default_decl,
63        },
64    )(i)
65}
66
67#[derive(Clone, Debug, Display)]
68pub enum AttType {
69    /// StringType         ::=     'CDATA'
70    #[display(fmt = "CDATA")]
71    StringType,
72    #[display(fmt = "{}", "_0")]
73    TokenizedType(TokenizedType),
74    #[display(fmt = "{}", "_0")]
75    EnumeratedType(EnumeratedType),
76}
77
78#[derive(Clone, Debug, Display)]
79pub enum TokenizedType {
80    ID,
81    IDREF,
82    IDREFS,
83    ENTITY,
84    ENTITIES,
85    NMTOKEN,
86    NMTOKENS,
87}
88
89///      StringType         ::=     'CDATA'
90#[tracable_parser]
91fn string_type(i: Span) -> Result<AttType> {
92    map(tag("CDATA"), |_| AttType::StringType)(i)
93}
94
95///      TokenizedType      ::=     'ID'            [VC: ID]
96///                                                 [VC: One ID per Element Type]
97///                                                 [VC: ID Attribute Default]
98///                                 | 'IDREF'       [VC: IDREF]
99///                                 | 'IDREFS'      [VC: IDREF]
100///                                 | 'ENTITY'      [VC: Entity Name]           
101///                                 | 'ENTITIES'    [VC: Entity Name]
102///                                 | 'NMTOKEN'     [VC: Name Token]
103///                                 | 'NMTOKENS'    [VC: Name Token]]
104#[tracable_parser]
105fn tokenized_type(i: Span) -> Result<AttType> {
106    map(
107        alt((
108            terminated(tag("ID"), space1),
109            terminated(tag("IDREF"), space1),
110            terminated(tag("IDREFS"), space1),
111            terminated(tag("ENTITY"), space1),
112            terminated(tag("ENTITIES"), space1),
113            terminated(tag("NMTOKEN"), space1),
114            terminated(tag("NMTOKENS"), space1),
115        )),
116        |ty: Span| match *ty {
117            "ID" => AttType::TokenizedType(TokenizedType::ID),
118            "IDREF" => AttType::TokenizedType(TokenizedType::IDREF),
119            "IDREFS" => AttType::TokenizedType(TokenizedType::IDREFS),
120            "ENTITY" => AttType::TokenizedType(TokenizedType::ENTITY),
121            "ENTITIES" => AttType::TokenizedType(TokenizedType::ENTITIES),
122            "NMTOKEN" => AttType::TokenizedType(TokenizedType::NMTOKEN),
123            "NMTOKENS" => AttType::TokenizedType(TokenizedType::NMTOKENS),
124            _ => unreachable!(),
125        },
126    )(i)
127}
128
129/// AttType            ::=     StringType | TokenizedType | EnumeratedType
130#[tracable_parser]
131fn atttype(i: Span) -> Result<AttType> {
132    alt((
133        string_type,
134        tokenized_type,
135        map(enumerated_type, AttType::EnumeratedType),
136    ))(i)
137}
138
139/// EnumeratedType ::= NotationType | Enumeration
140#[derive(Clone, Debug, Display)]
141pub enum EnumeratedType {
142    #[display(fmt = "NOTATION ({})*", "_0")]
143    NotationType(NotationType),
144    #[display(fmt = "({})", "_0")]
145    Enumeration(Enumeration),
146}
147
148/// EnumeratedType ::= NotationType | Enumeration
149#[tracable_parser]
150fn enumerated_type(i: Span) -> Result<EnumeratedType> {
151    alt((
152        map(notation_type, EnumeratedType::NotationType),
153        map(enumeration, EnumeratedType::Enumeration),
154    ))(i)
155}
156
157/// NotationType   ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'  [VC: Notation Attributes]
158///                                                                       [VC: One Notation Per Element Type]
159///                                                                       [VC: No Notation on Empty Element]
160///                                                                       [VC: No Duplicate Tokens]
161#[derive(Clone, Debug, Display)]
162#[display(
163    fmt = "{}",
164    "_0.iter().map(|v|v.to_string()).collect::<Vec<_>>().join(\" | \")"
165)]
166pub struct NotationType(Vec<Name>);
167
168#[tracable_parser]
169fn notation_type(i: Span) -> Result<NotationType> {
170    map(
171        tuple((
172            tag("NOTATION"),
173            multispace1,
174            tag("("),
175            multispace0,
176            separated_list1(tuple((multispace0, tag("|"), multispace0)), name),
177            multispace0,
178            tag(")"),
179        )),
180        |(_, _, _, _, names, _, _)| NotationType(names),
181    )(i)
182}
183
184/// Enumeration    ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'         [VC: Enumeration]
185///                                                                       [VC: No Duplicate Tokens]
186#[derive(AsRef, AsMut, Clone, Debug, Display, Deref, DerefMut, IntoIterator)]
187#[display(
188    fmt = "{}",
189    "_0.iter().map(|v|v.to_string()).collect::<Vec<_>>().join(\" | \")"
190)]
191pub struct Enumeration(Vec<Nmtoken>);
192
193/// Enumeration    ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'         [VC: Enumeration]
194///                                                                       [VC: No Duplicate Tokens]
195#[tracable_parser]
196fn enumeration(i: Span) -> Result<Enumeration> {
197    map(
198        tuple((
199            tag("("),
200            multispace0,
201            separated_list1(tuple((multispace0, tag("|"), multispace0)), nmtoken),
202            multispace0,
203            tag(")"),
204        )),
205        |(_, _, tokens, _, _)| Enumeration(tokens),
206    )(i)
207}
208
209#[derive(Clone, Debug, Display)]
210pub enum DefaultDecl {
211    #[display(fmt = "#REQUIRED")]
212    Required,
213    #[display(fmt = "#IMPLIED")]
214    Implied,
215    #[display(fmt = "#FIXED {}", "_0")]
216    Fixed(AttValue),
217    #[display(fmt = "{}", "_0")]
218    Default(AttValue),
219}
220/// DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
221///                 | (('#FIXED' S)? AttValue) [VC: Required Attribute]
222///                                            [VC: Attribute Default Value Syntactically Correct]
223///                                            [WFC: No < in Attribute Values]
224///                                            [VC: Fixed Attribute Default]
225///                                            [WFC: No External Entity References]
226///
227/// <!ATTLIST termdef
228///           id      ID      #REQUIRED
229///           name    CDATA   #IMPLIED>
230/// <!ATTLIST list
231///           type    (bullets|ordered|glossary)  "ordered">
232/// <!ATTLIST form
233///           method  CDATA   #FIXED "POST">
234#[tracable_parser]
235fn default_decl(i: Span) -> Result<DefaultDecl> {
236    alt((
237        map(tag("#REQUIRED"), |_| DefaultDecl::Required),
238        map(tag("#IMPLIED"), |_| DefaultDecl::Implied),
239        map(
240            pair(opt(pair(tag("#FIXED"), multispace1)), attvalue),
241            |(isfixed, attvalue)| {
242                if isfixed.is_some() {
243                    DefaultDecl::Fixed(attvalue)
244                } else {
245                    DefaultDecl::Default(attvalue)
246                }
247            },
248        ),
249    ))(i)
250}
251
252#[derive(AsRef, AsMut, Clone, Debug, Display, Deref, DerefMut, IntoIterator)]
253#[display(
254    fmt = "{}",
255    "_0.iter().map(|v|v.to_string()).collect::<Vec<_>>().join(\" \")"
256)]
257pub struct AttValue(Vec<ValueOrReference>);
258
259#[derive(Clone, Debug, Display)]
260pub enum ValueOrReference {
261    Value(Value),
262    Reference(Reference),
263}
264
265#[derive(AsRef, AsMut, Clone, Debug, Display, Deref, DerefMut)]
266pub struct Value(String);
267
268/// AttValue ::= '"' ([^<&"] | Reference)* '"'
269///              |  "'" ([^<&'] | Reference)* "'"
270#[tracable_parser]
271fn attvalue(i: Span) -> Result<AttValue> {
272    map(
273        alt((
274            delimited(
275                char('"'),
276                many0(alt((
277                    map(is_not("<&\""), |v: Span| {
278                        ValueOrReference::Value(Value(v.to_string()))
279                    }),
280                    map(reference, |r| ValueOrReference::Reference(r)),
281                ))),
282                char('"'),
283            ),
284            delimited(
285                char('\''),
286                many0(alt((
287                    map(is_not("<&'"), |v: Span| {
288                        ValueOrReference::Value(Value(v.to_string()))
289                    }),
290                    map(reference, |r| ValueOrReference::Reference(r)),
291                ))),
292                char('\''),
293            ),
294        )),
295        |v| AttValue(v),
296    )(i)
297}
298
299#[cfg(test)]
300mod tests {
301    use nom::Finish;
302
303    use super::attlist_decl;
304    use crate::span;
305
306    // <!ATTLIST termdef
307    //           id      ID      #REQUIRED
308    //           name    CDATA   #IMPLIED>
309    #[test]
310    fn test_attlist_1() {
311        let attlist = attlist_decl(span(
312            r#"<!ATTLIST termdef
313             id      ID      #REQUIRED
314             name    CDATA   #IMPLIED>"#,
315        ))
316        .finish();
317        assert!(attlist.is_ok(), "{:?}", attlist.as_ref().unwrap_err());
318    }
319    // <!ATTLIST list
320    //           type    (bullets|ordered|glossary)  "ordered">
321    #[test]
322    fn test_attlist_2() {
323        let attlist = attlist_decl(span(
324            r#"<!ATTLIST list
325             type    (bullets|ordered|glossary)  "ordered">"#,
326        ))
327        .finish();
328        assert!(attlist.is_ok(), "{:?}", attlist.as_ref().unwrap_err());
329    }
330    // <!ATTLIST form
331    //           method  CDATA   #FIXED "POST">
332    #[test]
333    fn test_attlist_3() {
334        let attlist = attlist_decl(span(
335            r#"<!ATTLIST form
336             method  CDATA   #FIXED "POST">"#,
337        ))
338        .finish();
339        assert!(attlist.is_ok(), "{:?}", attlist.as_ref().unwrap_err());
340    }
341
342    #[test]
343    fn test_attlist_4() {
344        let attlist = attlist_decl(span(
345            r#"<!ATTLIST document
346     ids        NMTOKENS          #IMPLIED
347    names      CDATA     #IMPLIED
348    dupnames   CDATA     #IMPLIED
349    source    CDATA              #IMPLIED
350    classes    NMTOKENS   #IMPLIED
351
352    title     CDATA     #IMPLIED>"#,
353        ))
354        .finish();
355        // dbg!(attlist.as_ref().unwrap_err());
356        assert!(attlist.is_ok(), "{:?}", attlist.as_ref().unwrap_err());
357    }
358}