1use crate::{bstr, Map, Val};
3use alloc::string::{String, ToString};
4use alloc::{borrow::ToOwned, boxed::Box, format, vec::Vec};
5use core::fmt::{self, Formatter};
6use std::io;
7use xmlparser::{ElementEnd, ExternalId, StrSpan, TextPos, Token, Tokenizer};
8
9pub fn parse_many(s: &str) -> impl Iterator<Item = Result<Val, PError>> + '_ {
11 let mut tokens = Tokenizer::from(s);
12 core::iter::from_fn(move || tokens.next().map(|tk| parse(tk?, &mut tokens)))
13}
14
15#[derive(Debug)]
17struct Tag<'a>(StrSpan<'a>, StrSpan<'a>);
18
19impl PartialEq for Tag<'_> {
20 fn eq(&self, rhs: &Self) -> bool {
21 (self.0.as_str(), self.1.as_str()) == (rhs.0.as_str(), rhs.1.as_str())
22 }
23}
24
25impl fmt::Display for Tag<'_> {
26 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
27 if !self.0.is_empty() {
28 write!(f, "{}:", self.0)?;
29 }
30 write!(f, "{}", self.1)
31 }
32}
33
34impl Tag<'_> {
35 fn tag_pos(&self, tokens: &Tokenizer) -> TagPos {
36 let pos = tokens.stream().gen_text_pos_from(self.0.start());
37 TagPos(self.to_string(), pos)
38 }
39}
40
41#[derive(Debug)]
43pub struct TagPos(String, TextPos);
44
45impl fmt::Display for TagPos {
46 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
47 write!(f, "{} (at {})", self.0, self.1)
48 }
49}
50
51#[derive(Debug)]
53pub struct LError(xmlparser::Error);
54
55#[derive(Debug)]
57pub enum PError {
58 Lex(LError),
60 Unmatched(TagPos, TagPos),
62 Unclosed(TagPos),
64}
65
66impl fmt::Display for PError {
67 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
68 match self {
69 Self::Lex(LError(e)) => e.fmt(f),
70 Self::Unmatched(open, close) => {
71 write!(f, "expected closing tag for {open}, found {close}")
72 }
73 Self::Unclosed(open) => {
74 write!(f, "expected closing tag for {open}, found end of file")
75 }
76 }
77 }
78}
79
80impl From<xmlparser::Error> for PError {
81 fn from(e: xmlparser::Error) -> Self {
82 Self::Lex(LError(e))
83 }
84}
85
86impl std::error::Error for PError {}
87
88fn parse_children(tag: &Tag, tokens: &mut Tokenizer) -> Result<Vec<Val>, PError> {
89 let mut children = Vec::new();
90 loop {
91 let Some(tk) = tokens.next() else {
92 return Err(PError::Unclosed(tag.tag_pos(tokens)));
93 };
94 match tk? {
95 Token::ElementEnd {
96 end: ElementEnd::Close(prefix, local),
97 ..
98 } => {
99 let tag_ = Tag(prefix, local);
100 if *tag == tag_ {
101 return Ok(children);
102 } else {
103 Err(PError::Unmatched(tag.tag_pos(tokens), tag_.tag_pos(tokens)))?
104 }
105 }
106 tk => children.push(parse(tk, tokens)?),
107 }
108 }
109}
110
111fn tac(tag: &Tag, tokens: &mut Tokenizer) -> Result<Val, PError> {
112 let mut attrs = Vec::new();
113 let children = loop {
114 let tk = tokens.next().unwrap();
116 match tk? {
117 Token::Attribute {
118 prefix,
119 local,
120 value,
121 ..
122 } => attrs.push((
123 Tag(prefix, local).to_string().into(),
124 value.as_str().to_owned().into(),
125 )),
126 Token::ElementEnd { end, .. } => match end {
127 ElementEnd::Open => break Some(parse_children(tag, tokens)?),
128 ElementEnd::Empty => break None,
129 ElementEnd::Close(..) => panic!(),
131 },
132 _ => panic!(),
134 }
135 };
136 let attrs = if attrs.is_empty() { None } else { Some(attrs) };
137
138 Ok(make_obj([
139 ("t", Some(tag.to_string().into())),
140 ("a", attrs.map(|v| Val::obj(v.into_iter().collect()))),
141 ("c", children.map(|v| v.into_iter().collect())),
142 ]))
143}
144
145fn doctype(name: &str, external: Option<ExternalId>, internal: Option<&str>) -> Val {
146 let external = external.map(|ext| match ext {
147 ExternalId::System(system) => format!("SYSTEM {system}"),
148 ExternalId::Public(pub_id, system) => format!("PUBLIC {pub_id} {system}"),
149 });
150 make_obj([
151 ("name", Some(name.to_owned())),
152 ("external", external),
153 ("internal", internal.map(|s| s.to_owned())),
154 ])
155}
156
157fn make_obj<T: Into<Val>, const N: usize>(arr: [(&str, Option<T>); N]) -> Val {
158 let iter = arr
159 .into_iter()
160 .flat_map(|(k, v)| v.map(|v| (k.to_owned().into(), v.into())));
161 Val::obj(iter.collect())
162}
163
164fn parse(tk: Token, tokens: &mut Tokenizer) -> Result<Val, PError> {
165 let ss_val = |ss: StrSpan| ss.as_str().to_owned().into();
166 let singleton = |k: &str, v| Val::obj(core::iter::once((k.to_string().into(), v)).collect());
167
168 Ok(match tk {
169 Token::Declaration {
170 version,
171 encoding,
172 standalone,
173 ..
174 } => singleton(
175 "xmldecl",
176 make_obj([
177 ("version", Some(ss_val(version))),
178 ("encoding", encoding.map(ss_val)),
179 ("standalone", standalone.map(|b| b.into())),
180 ]),
181 ),
182 Token::ProcessingInstruction {
183 target, content, ..
184 } => singleton(
185 "pi",
186 make_obj([
187 ("target", Some(ss_val(target))),
188 ("content", content.map(ss_val)),
189 ]),
190 ),
191 Token::Cdata { text, .. } => singleton("cdata", ss_val(text)),
192 Token::Comment { text, .. } => singleton("comment", ss_val(text)),
193 Token::ElementStart { prefix, local, .. } => tac(&Tag(prefix, local), tokens)?,
194 Token::Text { text } => ss_val(text),
195 Token::Attribute { .. }
197 | Token::DtdEnd { .. }
198 | Token::ElementEnd { .. }
199 | Token::EntityDeclaration { .. } => panic!(),
200 Token::DtdStart {
201 name,
202 external_id,
203 span,
204 } => {
205 let internal = loop {
206 let Some(tk) = tokens.next() else {
207 let pos = tokens.stream().gen_text_pos_from(span.start());
208 Err(PError::Unclosed(TagPos("DOCTYPE".into(), pos)))?
209 };
210 if let Token::DtdEnd { span: span_ } = tk? {
211 break &tokens.stream().span().as_str()[span.end()..span_.start()];
212 }
213 };
214 singleton("doctype", doctype(&name, external_id, Some(internal)))
215 }
216 Token::EmptyDtd {
217 name, external_id, ..
218 } => singleton("doctype", doctype(&name, external_id, None)),
219 })
220}
221
222#[derive(Debug)]
224pub enum SError {
225 InvalidEntry(&'static str, Val, Val),
227 SingletonObj(Val),
229}
230
231impl fmt::Display for SError {
232 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
233 match self {
234 Self::InvalidEntry(o, k, v) => {
235 write!(f, "invalid entry in {o} object: {{\"{k}\": {v}}}")
236 }
237 Self::SingletonObj(v) => write!(f, "expected singleton object, found: {v}"),
238 }
239 }
240}
241
242impl std::error::Error for SError {}
243
244pub enum Xml<S> {
246 XmlDecl(Vec<(S, S)>),
248 DocType {
250 name: S,
252 external: Option<S>,
254 internal: Option<S>,
256 },
257 Pi {
259 target: S,
261 content: Option<S>,
263 },
264 Tac(S, Vec<(S, S)>, Option<Box<Self>>),
268 Seq(Vec<Self>),
270 Scalar(Val),
272 Cdata(S),
274 Comment(S),
276}
277
278impl<'a> TryFrom<&'a Val> for Xml<&'a [u8]> {
279 type Error = SError;
280 fn try_from(v: &'a Val) -> Result<Self, Self::Error> {
281 use jaq_std::ValT;
282 let from_kv = |(k, v): (&'a _, &'a _)| match (k, v) {
283 (Val::Str(k, _), Val::Str(v, _)) => Ok((&**k, &**v)),
284 _ => Err(SError::InvalidEntry("attribute", k.clone(), v.clone())),
285 };
286 let from_kvs = |a: &'a Map| a.iter().map(from_kv).collect::<Result<_, _>>();
287
288 let from_tac = |o: &'a Map| {
289 let mut t = &b""[..];
290 let mut a = Vec::new();
291 let mut c = None;
292 for (k, v) in o.iter() {
293 let fail = || SError::InvalidEntry("tac", k.clone(), v.clone());
294 let k = k.as_utf8_bytes().ok_or_else(fail)?;
295 match (k, v) {
296 (b"t", Val::Str(s, _)) => t = s,
297 (b"a", Val::Obj(attrs)) => a = from_kvs(attrs)?,
298 (b"c", v) => c = Some(Box::new(v.try_into()?)),
299 _ => Err(fail())?,
300 }
301 }
302 Ok(Self::Tac(t, a, c))
303 };
304 let from_dt = |o: &'a Map| {
305 let mut name = &b""[..];
306 let mut external = None;
307 let mut internal = None;
308 for (k, v) in o.iter() {
309 let fail = || SError::InvalidEntry("doctype", k.clone(), v.clone());
310 let k = k.as_utf8_bytes().ok_or_else(fail)?;
311 match (k, v) {
312 (b"name", Val::Str(s, _)) => name = s,
313 (b"external", Val::Str(s, _)) => external = Some(&**s),
314 (b"internal", Val::Str(s, _)) => internal = Some(&**s),
315 _ => Err(fail())?,
316 }
317 }
318 Ok(Self::DocType {
319 name,
320 external,
321 internal,
322 })
323 };
324 let from_pi = |o: &'a Map| {
325 let mut target = &b""[..];
326 let mut content = None;
327 for (k, v) in o.iter() {
328 let fail = || SError::InvalidEntry("pi", k.clone(), v.clone());
329 let k = k.as_utf8_bytes().ok_or_else(fail)?;
330 match (k, v) {
331 (b"target", Val::Str(s, _)) => target = s,
332 (b"content", Val::Str(s, _)) => content = Some(&**s),
333 _ => Err(fail())?,
334 }
335 }
336 Ok(Self::Pi { target, content })
337 };
338 let contains_key = |o: &Map, k: &str| o.contains_key(&Val::from(k.to_string()));
339 match v {
340 Val::Arr(a) => a
341 .iter()
342 .map(TryInto::try_into)
343 .collect::<Result<_, _>>()
344 .map(Self::Seq),
345 Val::Obj(o) if contains_key(o, "t") => from_tac(o),
346 Val::Obj(o) => {
347 let mut o = o.iter();
348 let (k, v) = match (o.next(), o.next()) {
349 (Some(kv), None) => kv,
350 _ => Err(SError::SingletonObj(v.clone()))?,
351 };
352 let fail = || SError::InvalidEntry("unknown", k.clone(), v.clone());
353 let k = k.as_utf8_bytes().ok_or_else(fail)?;
354 match (k, v) {
355 (b"xmldecl", Val::Obj(kvs)) => from_kvs(kvs).map(Self::XmlDecl),
356 (b"doctype", Val::Obj(o)) if contains_key(o, "name") => from_dt(o),
357 (b"cdata", Val::Str(s, _)) => Ok(Self::Cdata(s)),
358 (b"comment", Val::Str(s, _)) => Ok(Self::Comment(s)),
359 (b"pi", Val::Obj(o)) if contains_key(o, "target") => from_pi(o),
360 _ => Err(fail())?,
361 }
362 }
363 Val::Null | Val::Bool(_) | Val::Num(_) | Val::Str(..) => Ok(Self::Scalar(v.clone())),
364 }
365 }
366}
367
368macro_rules! write_kvs {
369 ($w:ident, $a:ident, $f:expr) => {{
370 $a.iter().try_for_each(|(k, v)| {
371 write!($w, " ")?;
372 $f(k)?;
373 write!($w, "=\"")?;
374 $f(v)?;
375 write!($w, "\"")
376 })
377 }};
378}
379
380macro_rules! write_val {
381 ($w:ident, $v:ident, $fs:expr, $fv:expr) => {{
382 match $v {
383 Xml::Scalar(Val::Str(s, _)) => $fs(s),
384 Xml::Scalar(v) => write!($w, "{v}"),
385 Xml::Seq(a) => a.iter().try_for_each($fv),
386 Xml::Tac(t, a, c) => {
387 write!($w, "<")?;
388 $fs(t)?;
389 write_kvs!($w, a, $fs)?;
390 if let Some(c) = c {
391 write!($w, ">")?;
392 $fv(c)?;
393 write!($w, "</")?;
394 $fs(t)?;
395 write!($w, ">")
396 } else {
397 write!($w, "/>")
398 }
399 }
400 Xml::XmlDecl(a) => {
401 write!($w, "<?xml")?;
402 write_kvs!($w, a, $fs)?;
403 write!($w, "?>")
404 }
405 Self::DocType {
406 name,
407 external,
408 internal,
409 } => {
410 write!($w, "<!DOCTYPE ")?;
411 $fs(name)?;
412 if let Some(s) = external {
413 write!($w, " ")?;
414 $fs(s)?;
415 }
416 if let Some(s) = internal {
417 write!($w, " [")?;
418 $fs(s)?;
419 write!($w, "]")?;
420 }
421 write!($w, ">")
422 }
423 Self::Cdata(s) => {
424 write!($w, "<![CDATA[")?;
425 $fs(s)?;
426 write!($w, "]]>")
427 }
428 Self::Comment(s) => {
429 write!($w, "<!--")?;
430 $fs(s)?;
431 write!($w, "-->")
432 }
433 Self::Pi { target, content } => {
434 write!($w, "<?")?;
435 $fs(target)?;
436 if let Some(s) = content {
437 write!($w, " ")?;
438 $fs(s)?;
439 }
440 write!($w, "?>")
441 }
442 }
443 }};
444}
445
446impl fmt::Display for Xml<&[u8]> {
447 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
448 write_val!(f, self, |s| bstr(s).fmt(f), |v: &Self| v.fmt(f))
449 }
450}
451
452impl Xml<&[u8]> {
453 pub fn write(&self, w: &mut dyn io::Write) -> io::Result<()> {
455 write_val!(w, self, |s: &[u8]| w.write_all(s), |v: &Self| v.write(w))
456 }
457}