Skip to main content

node_html_parser/parser/
types.rs

1//! 解析器相关的数据类型定义
2
3use crate::dom::void_tag::VoidTagOptions;
4use std::collections::HashMap;
5
6/// 🔥 零拷贝版本 - 避免字符串分配,使用生命周期借用
7#[derive(Debug, Clone)]
8pub struct ZeroCopyTagMatch<'a> {
9	pub start: usize,
10	pub end: usize,
11	pub is_comment: bool,
12	pub is_closing: bool,
13	pub tag_name: &'a str,
14	pub attrs: &'a str,
15	pub self_closing: bool,
16}
17
18#[derive(Debug, Clone)]
19pub struct Options {
20	pub lower_case_tag_name: bool,
21	pub comment: bool,
22	/// Corresponds to js option fixNestedATags
23	pub fix_nested_a_tags: bool,
24	/// Parse not-closed tags (do not attempt JS style repair) -> corresponds to parseNoneClosedTags
25	pub parse_none_closed_tags: bool,
26	/// When true, preserve tag nesting as-is and skip JS-style auto-closing repairs
27	/// (corresponds to JS option `preserveTagNesting`).
28	pub preserve_tag_nesting: bool,
29	pub block_text_elements: HashMap<String, bool>, // tag -> ignore inner html when true
30	/// When true, even if block_text_elements requests extraction for script/style, we suppress
31	/// creating the inner raw Text node (used by tests expecting empty script/style by default).
32	pub suppress_script_style_text: bool,
33	pub void_tag: VoidTagOptions,
34}
35
36impl Default for Options {
37	fn default() -> Self {
38		let mut block = HashMap::new();
39		// 默认:script/style/noscript/pre 都作为 block 文本元素,捕获其原始文本(不解析内部标签)
40		block.insert("script".into(), true);
41		block.insert("style".into(), true);
42		block.insert("noscript".into(), true);
43		block.insert("pre".into(), true);
44		Self {
45			lower_case_tag_name: false,
46			comment: false,
47			fix_nested_a_tags: false,
48			parse_none_closed_tags: false,
49			preserve_tag_nesting: false,
50			block_text_elements: block,
51			suppress_script_style_text: false,
52			void_tag: Default::default(),
53		}
54	}
55}
56
57#[derive(Clone)]
58pub(crate) struct StackEntry {
59	pub elem: Box<crate::dom::element::HTMLElement>,
60}