1use std::fmt::{self, Debug, Display, Formatter};
2
3use ecow::{EcoString, EcoVec};
4use typst_syntax::Span;
5use typst_utils::{PicoStr, ResolvedPicoStr};
6
7use crate::diag::{bail, HintedStrResult, StrResult};
8use crate::foundations::{cast, Dict, Repr, Str};
9use crate::introspection::{Introspector, Tag};
10use crate::layout::Frame;
11use crate::model::DocumentInfo;
12
13#[derive(Debug, Clone)]
15pub struct HtmlDocument {
16 pub root: HtmlElement,
18 pub info: DocumentInfo,
20 pub introspector: Introspector,
22}
23
24#[derive(Debug, Clone, Hash)]
26pub enum HtmlNode {
27 Tag(Tag),
29 Text(EcoString, Span),
31 Element(HtmlElement),
33 Frame(Frame),
35}
36
37impl HtmlNode {
38 pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
40 Self::Text(text.into(), span)
41 }
42}
43
44impl From<HtmlElement> for HtmlNode {
45 fn from(element: HtmlElement) -> Self {
46 Self::Element(element)
47 }
48}
49
50#[derive(Debug, Clone, Hash)]
52pub struct HtmlElement {
53 pub tag: HtmlTag,
55 pub attrs: HtmlAttrs,
57 pub children: Vec<HtmlNode>,
59 pub span: Span,
61}
62
63impl HtmlElement {
64 pub fn new(tag: HtmlTag) -> Self {
66 Self {
67 tag,
68 attrs: HtmlAttrs::default(),
69 children: vec![],
70 span: Span::detached(),
71 }
72 }
73
74 pub fn with_children(mut self, children: Vec<HtmlNode>) -> Self {
78 self.children = children;
79 self
80 }
81
82 pub fn with_attr(mut self, key: HtmlAttr, value: impl Into<EcoString>) -> Self {
84 self.attrs.push(key, value);
85 self
86 }
87
88 pub fn spanned(mut self, span: Span) -> Self {
90 self.span = span;
91 self
92 }
93}
94
95#[derive(Copy, Clone, Eq, PartialEq, Hash)]
97pub struct HtmlTag(PicoStr);
98
99impl HtmlTag {
100 pub fn intern(string: &str) -> StrResult<Self> {
102 if string.is_empty() {
103 bail!("tag name must not be empty");
104 }
105
106 if let Some(c) = string.chars().find(|&c| !charsets::is_valid_in_tag_name(c)) {
107 bail!("the character {} is not valid in a tag name", c.repr());
108 }
109
110 Ok(Self(PicoStr::intern(string)))
111 }
112
113 #[track_caller]
117 pub const fn constant(string: &'static str) -> Self {
118 if string.is_empty() {
119 panic!("tag name must not be empty");
120 }
121
122 let bytes = string.as_bytes();
123 let mut i = 0;
124 while i < bytes.len() {
125 if !bytes[i].is_ascii() || !charsets::is_valid_in_tag_name(bytes[i] as char) {
126 panic!("not all characters are valid in a tag name");
127 }
128 i += 1;
129 }
130
131 Self(PicoStr::constant(string))
132 }
133
134 pub fn resolve(self) -> ResolvedPicoStr {
136 self.0.resolve()
137 }
138
139 pub const fn into_inner(self) -> PicoStr {
141 self.0
142 }
143}
144
145impl Debug for HtmlTag {
146 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
147 Display::fmt(self, f)
148 }
149}
150
151impl Display for HtmlTag {
152 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
153 write!(f, "<{}>", self.resolve())
154 }
155}
156
157cast! {
158 HtmlTag,
159 self => self.0.resolve().as_str().into_value(),
160 v: Str => Self::intern(&v)?,
161}
162
163#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
165pub struct HtmlAttrs(pub EcoVec<(HtmlAttr, EcoString)>);
166
167impl HtmlAttrs {
168 pub fn push(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
170 self.0.push((attr, value.into()));
171 }
172}
173
174cast! {
175 HtmlAttrs,
176 self => self.0
177 .into_iter()
178 .map(|(key, value)| (key.resolve().as_str().into(), value.into_value()))
179 .collect::<Dict>()
180 .into_value(),
181 values: Dict => Self(values
182 .into_iter()
183 .map(|(k, v)| {
184 let attr = HtmlAttr::intern(&k)?;
185 let value = v.cast::<EcoString>()?;
186 Ok((attr, value))
187 })
188 .collect::<HintedStrResult<_>>()?),
189}
190
191#[derive(Copy, Clone, Eq, PartialEq, Hash)]
193pub struct HtmlAttr(PicoStr);
194
195impl HtmlAttr {
196 pub fn intern(string: &str) -> StrResult<Self> {
198 if string.is_empty() {
199 bail!("attribute name must not be empty");
200 }
201
202 if let Some(c) =
203 string.chars().find(|&c| !charsets::is_valid_in_attribute_name(c))
204 {
205 bail!("the character {} is not valid in an attribute name", c.repr());
206 }
207
208 Ok(Self(PicoStr::intern(string)))
209 }
210
211 #[track_caller]
218 pub const fn constant(string: &'static str) -> Self {
219 if string.is_empty() {
220 panic!("attribute name must not be empty");
221 }
222
223 let bytes = string.as_bytes();
224 let mut i = 0;
225 while i < bytes.len() {
226 if !bytes[i].is_ascii()
227 || !charsets::is_valid_in_attribute_name(bytes[i] as char)
228 {
229 panic!("not all characters are valid in an attribute name");
230 }
231 i += 1;
232 }
233
234 Self(PicoStr::constant(string))
235 }
236
237 pub fn resolve(self) -> ResolvedPicoStr {
239 self.0.resolve()
240 }
241
242 pub const fn into_inner(self) -> PicoStr {
244 self.0
245 }
246}
247
248impl Debug for HtmlAttr {
249 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
250 Display::fmt(self, f)
251 }
252}
253
254impl Display for HtmlAttr {
255 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
256 write!(f, "{}", self.resolve())
257 }
258}
259
260cast! {
261 HtmlAttr,
262 self => self.0.resolve().as_str().into_value(),
263 v: Str => Self::intern(&v)?,
264}
265
266pub mod charsets {
268 pub const fn is_valid_in_tag_name(c: char) -> bool {
270 c.is_ascii_alphanumeric()
271 }
272
273 pub const fn is_valid_in_attribute_name(c: char) -> bool {
275 match c {
276 '\0' | ' ' | '"' | '\'' | '>' | '/' | '=' => false,
278 c if is_whatwg_control_char(c) => false,
279 c if is_whatwg_non_char(c) => false,
280 _ => true,
283 }
284 }
285
286 pub const fn is_valid_in_attribute_value(c: char) -> bool {
291 match c {
292 '&' => false,
295 '"' => false,
298 c => is_w3c_text_char(c),
300 }
301 }
302
303 pub const fn is_valid_in_normal_element_text(c: char) -> bool {
306 match c {
307 '&' => false,
310 '<' => false,
312 c => is_w3c_text_char(c),
314 }
315 }
316
317 pub const fn is_w3c_text_char(c: char) -> bool {
319 match c {
320 c if is_whatwg_non_char(c) => false,
322 c if is_whatwg_control_char(c) => c.is_ascii_whitespace(),
324 _ => true,
326 }
327 }
328
329 const fn is_whatwg_non_char(c: char) -> bool {
330 match c {
331 '\u{fdd0}'..='\u{fdef}' => true,
332 c if c as u32 & 0xfffe == 0xfffe && c as u32 <= 0x10ffff => true,
334 _ => false,
335 }
336 }
337
338 const fn is_whatwg_control_char(c: char) -> bool {
339 match c {
340 '\u{00}'..='\u{1f}' => true,
342 '\u{7f}'..='\u{9f}' => true,
344 _ => false,
345 }
346 }
347}
348
349pub mod tag {
351 use super::HtmlTag;
352
353 macro_rules! tags {
354 ($($tag:ident)*) => {
355 $(#[allow(non_upper_case_globals)]
356 pub const $tag: HtmlTag = HtmlTag::constant(
357 stringify!($tag)
358 );)*
359 }
360 }
361
362 tags! {
363 a
364 abbr
365 address
366 area
367 article
368 aside
369 audio
370 b
371 base
372 bdi
373 bdo
374 blockquote
375 body
376 br
377 button
378 canvas
379 caption
380 cite
381 code
382 col
383 colgroup
384 data
385 datalist
386 dd
387 del
388 details
389 dfn
390 dialog
391 div
392 dl
393 dt
394 em
395 embed
396 fieldset
397 figcaption
398 figure
399 footer
400 form
401 h1
402 h2
403 h3
404 h4
405 h5
406 h6
407 head
408 header
409 hgroup
410 hr
411 html
412 i
413 iframe
414 img
415 input
416 ins
417 kbd
418 label
419 legend
420 li
421 link
422 main
423 map
424 mark
425 menu
426 meta
427 meter
428 nav
429 noscript
430 object
431 ol
432 optgroup
433 option
434 output
435 p
436 param
437 picture
438 pre
439 progress
440 q
441 rp
442 rt
443 ruby
444 s
445 samp
446 script
447 search
448 section
449 select
450 slot
451 small
452 source
453 span
454 strong
455 style
456 sub
457 summary
458 sup
459 table
460 tbody
461 td
462 template
463 textarea
464 tfoot
465 th
466 thead
467 time
468 title
469 tr
470 track
471 u
472 ul
473 var
474 video
475 wbr
476 }
477
478 pub fn is_void(tag: HtmlTag) -> bool {
481 matches!(
482 tag,
483 self::area
484 | self::base
485 | self::br
486 | self::col
487 | self::embed
488 | self::hr
489 | self::img
490 | self::input
491 | self::link
492 | self::meta
493 | self::param
494 | self::source
495 | self::track
496 | self::wbr
497 )
498 }
499
500 pub fn is_raw(tag: HtmlTag) -> bool {
502 matches!(tag, self::script | self::style)
503 }
504
505 pub fn is_escapable_raw(tag: HtmlTag) -> bool {
507 matches!(tag, self::textarea | self::title)
508 }
509
510 pub fn is_metadata(tag: HtmlTag) -> bool {
512 matches!(
513 tag,
514 self::base
515 | self::link
516 | self::meta
517 | self::noscript
518 | self::script
519 | self::style
520 | self::template
521 | self::title
522 )
523 }
524
525 pub fn is_block_by_default(tag: HtmlTag) -> bool {
528 matches!(
529 tag,
530 self::html
531 | self::head
532 | self::body
533 | self::article
534 | self::aside
535 | self::h1
536 | self::h2
537 | self::h3
538 | self::h4
539 | self::h5
540 | self::h6
541 | self::hgroup
542 | self::nav
543 | self::section
544 | self::dd
545 | self::dl
546 | self::dt
547 | self::menu
548 | self::ol
549 | self::ul
550 | self::address
551 | self::blockquote
552 | self::dialog
553 | self::div
554 | self::fieldset
555 | self::figure
556 | self::figcaption
557 | self::footer
558 | self::form
559 | self::header
560 | self::hr
561 | self::legend
562 | self::main
563 | self::p
564 | self::pre
565 | self::search
566 )
567 }
568
569 pub fn is_inline_by_default(tag: HtmlTag) -> bool {
580 matches!(
581 tag,
582 self::abbr
583 | self::a
584 | self::bdi
585 | self::b
586 | self::br
587 | self::bdo
588 | self::code
589 | self::cite
590 | self::dfn
591 | self::data
592 | self::i
593 | self::em
594 | self::mark
595 | self::kbd
596 | self::rp
597 | self::q
598 | self::ruby
599 | self::rt
600 | self::samp
601 | self::s
602 | self::span
603 | self::small
604 | self::sub
605 | self::strong
606 | self::time
607 | self::sup
608 | self::var
609 | self::u
610 )
611 }
612
613 pub fn is_tabular_by_default(tag: HtmlTag) -> bool {
616 matches!(
617 tag,
618 self::table
619 | self::thead
620 | self::tbody
621 | self::tfoot
622 | self::tr
623 | self::th
624 | self::td
625 | self::caption
626 | self::col
627 | self::colgroup
628 )
629 }
630}
631
632#[allow(non_upper_case_globals)]
636pub mod attr {
637 use super::HtmlAttr;
638
639 macro_rules! attrs {
640 ($($attr:ident)*) => {
641 $(#[allow(non_upper_case_globals)]
642 pub const $attr: HtmlAttr = HtmlAttr::constant(
643 stringify!($attr)
644 );)*
645 }
646 }
647
648 attrs! {
649 charset
650 cite
651 colspan
652 content
653 href
654 name
655 reversed
656 role
657 rowspan
658 start
659 style
660 value
661 }
662
663 pub const aria_level: HtmlAttr = HtmlAttr::constant("aria-level");
664}