1#![warn(missing_docs)]
41#![warn(clippy::all)]
42#![allow(clippy::module_name_repetitions)]
43
44mod types;
45mod parser;
46mod certification;
47mod summary;
48mod security;
49
50pub use types::*;
51pub use parser::SemanticDOM;
52pub use certification::{AgentCertification, CertificationLevel, ValidationCheck};
53pub use summary::{to_agent_summary, to_one_liner, to_nav_summary, to_audio_summary, compare_token_usage, TokenComparison};
54pub use security::{validate_url, SecurityConfig};
55
56use thiserror::Error;
57
58#[derive(Error, Debug)]
60pub enum Error {
61 #[error("Input exceeds maximum size of {max_size} bytes (got {actual_size})")]
63 InputTooLarge {
64 max_size: usize,
66 actual_size: usize,
68 },
69
70 #[error("URL has disallowed protocol: {protocol}")]
72 InvalidUrlProtocol {
73 protocol: String,
75 },
76
77 #[error("Failed to parse HTML: {0}")]
79 ParseError(String),
80
81 #[error("IO error: {0}")]
83 IoError(#[from] std::io::Error),
84}
85
86pub type Result<T> = std::result::Result<T, Error>;
88
89#[derive(Debug, Clone)]
91pub struct Config {
92 pub max_input_size: usize,
94 pub id_prefix: String,
96 pub max_depth: usize,
98 pub exclude_tags: Vec<String>,
100 pub include_state_graph: bool,
102 pub validate: bool,
104}
105
106impl Default for Config {
107 fn default() -> Self {
108 Self {
109 max_input_size: 10 * 1024 * 1024, id_prefix: "sdom".to_string(),
111 max_depth: 50,
112 exclude_tags: vec![
113 "script".to_string(),
114 "style".to_string(),
115 "noscript".to_string(),
116 "template".to_string(),
117 ],
118 include_state_graph: true,
119 validate: true,
120 }
121 }
122}
123
124pub const STANDARD: &str = "ISO/IEC-SDOM-SSG-DRAFT-2024";
126
127pub const VERSION: &str = env!("CARGO_PKG_VERSION");
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133
134 #[test]
135 fn test_parse_simple_html() {
136 let html = r##"
137 <html>
138 <body>
139 <nav>
140 <a href="#home">Home</a>
141 <a href="#about">About</a>
142 </nav>
143 <main>
144 <h1>Welcome</h1>
145 <button>Click me</button>
146 </main>
147 </body>
148 </html>
149 "##;
150
151 let result = SemanticDOM::parse(html, Config::default());
152 assert!(result.is_ok());
153
154 let sdom = result.unwrap();
155 assert!(!sdom.landmarks.is_empty());
156 assert!(!sdom.interactables.is_empty());
157 }
158
159 #[test]
160 fn test_o1_lookup() {
161 let html = r#"<html><body><button id="test-btn">Test</button></body></html>"#;
162 let sdom = SemanticDOM::parse(html, Config::default()).unwrap();
163
164 let node = sdom.index.values().find(|n| n.role == SemanticRole::Button);
166 assert!(node.is_some());
167 }
168
169 #[test]
170 fn test_agent_summary() {
171 let html = r##"
172 <html>
173 <body>
174 <nav><a href="#home">Home</a></nav>
175 <main><button>Submit</button></main>
176 </body>
177 </html>
178 "##;
179
180 let sdom = SemanticDOM::parse(html, Config::default()).unwrap();
181 let summary = sdom.to_agent_summary();
182
183 assert!(summary.contains("LANDMARKS:"));
184 assert!(summary.contains("ACTIONS:"));
185 }
186
187 #[test]
188 fn test_input_size_limit() {
189 let config = Config {
190 max_input_size: 100,
191 ..Default::default()
192 };
193
194 let html = "x".repeat(200);
195 let result = SemanticDOM::parse(&html, config);
196
197 assert!(matches!(result, Err(Error::InputTooLarge { .. })));
198 }
199
200 #[test]
201 fn test_url_validation() {
202 assert!(validate_url("https://example.com").is_ok());
203 assert!(validate_url("http://example.com").is_ok());
204 assert!(validate_url("file:///path/to/file").is_ok());
205 assert!(validate_url("/relative/path").is_ok());
206
207 assert!(validate_url("javascript:alert(1)").is_err());
208 assert!(validate_url("data:text/html,<script>").is_err());
209 }
210}