1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
use crate::xml::common::{is_name_char, is_name_start_char};
use crate::xml::reader::events::XmlEvent;
use crate::xml::reader::lexer::Token;
use crate::xml::reader::parser::{
DeclarationSubstate, ParserOutcome, ProcessingInstructionSubstate, PullParser, State,
};
impl PullParser {
pub fn inside_processing_instruction(
&mut self,
t: Token,
s: ProcessingInstructionSubstate,
) -> Option<ParserOutcome> {
match s {
ProcessingInstructionSubstate::PIInsideName => match t {
Token::Character(c)
if !self.buf_has_data() && is_name_start_char(c)
|| self.buf_has_data() && is_name_char(c) =>
{
self.append_char_continue(c)
}
Token::ProcessingInstructionEnd => {
// self.buf contains PI name
let name = self.take_buf();
// Don't need to check for declaration because it has mandatory attributes
// but there is none
match &name[..] {
// Name is empty, it is an error
"" => Some(
self_error!(self; "Encountered processing instruction without name"),
),
// Found <?xml-like PI not at the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
"xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML" => {
Some(self_error!(self; "Invalid processing instruction: <?{}", name))
}
// All is ok, emitting event
_ => self.as_state_emit(
State::OutsideTag,
Ok(XmlEvent::ProcessingInstruction { name, data: None }),
),
}
}
Token::Whitespace(_) => {
// self.buf contains PI name
let name = self.take_buf();
match &name[..] {
// We have not ever encountered an element and have not parsed XML declaration
"xml" if !self.encountered_element && !self.parsed_declaration => self
.as_state_continue(State::InsideDeclaration(
DeclarationSubstate::BeforeVersion,
)),
// Found <?xml-like PI after the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
"xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML"
if self.encountered_element || self.parsed_declaration =>
{
Some(self_error!(self; "Invalid processing instruction: <?{}", name))
}
// All is ok, starting parsing PI data
_ => {
self.lexer.disable_errors(); // data is arbitrary, so disable errors
self.data.name = name;
self.as_state_continue(State::InsideProcessingInstruction(
ProcessingInstructionSubstate::PIInsideData,
))
}
}
}
_ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t)),
},
ProcessingInstructionSubstate::PIInsideData => match t {
Token::ProcessingInstructionEnd => {
self.lexer.enable_errors();
let name = self.data.take_name();
let data = self.take_buf();
self.as_state_emit(
State::OutsideTag,
Ok(XmlEvent::ProcessingInstruction {
name,
data: Some(data),
}),
)
}
// Any other token should be treated as plain characters
_ => {
t.push_to_string(&mut self.buf);
None
}
},
}
}
}