1use std::collections::HashMap;
4use std::fmt;
5use std::sync::Arc;
6use std::time::Duration;
7
8use serde_json::Value;
9use servo::accesskit::{Node, NodeId};
10
11use crate::error::Error;
12use crate::net::sanitize_user_agent;
13
14#[derive(Debug, Clone, Default, serde::Serialize)]
16#[non_exhaustive]
17pub struct Page {
18 pub html: String,
20 pub inner_text: String,
22 pub title: Option<String>,
24 #[serde(skip_serializing_if = "Option::is_none")]
26 pub layout_json: Option<String>,
27 #[serde(skip)]
29 visibility_json: Option<String>,
30 #[serde(skip_serializing_if = "Option::is_none")]
32 pub js_result: Option<String>,
33 pub console_messages: Vec<ConsoleMessage>,
35 #[serde(skip_serializing_if = "Option::is_none")]
37 pub accessibility_tree: Option<String>,
38 #[serde(skip_serializing_if = "Option::is_none")]
40 pub extracted: Option<Value>,
41 #[serde(skip)]
43 screenshot_png: Option<Vec<u8>>,
44 #[serde(skip)]
46 a11y: Option<Arc<HashMap<NodeId, Node>>>,
47 #[serde(skip)]
49 visibility_policy: crate::visibility::VisibilityPolicy,
50}
51
52impl Page {
53 pub fn markdown(&self) -> crate::error::Result<String> {
55 self.markdown_with_url("")
56 }
57
58 pub fn markdown_with_url(&self, url: &str) -> crate::error::Result<String> {
60 Ok(crate::extract::extract_text(&self.extract_input(url, None))?)
61 }
62
63 pub fn extract_json(&self) -> crate::error::Result<String> {
65 self.extract_json_with_url("")
66 }
67
68 pub fn extract_json_with_url(&self, url: &str) -> crate::error::Result<String> {
70 Ok(crate::extract::extract_json(&self.extract_input(url, None))?)
71 }
72
73 pub fn markdown_with_selector(&self, url: &str, selector: &str) -> crate::error::Result<String> {
75 Ok(crate::extract::extract_text(&self.extract_input(url, Some(selector)))?)
76 }
77
78 pub fn extract_json_with_selector(&self, url: &str, selector: &str) -> crate::error::Result<String> {
80 Ok(crate::extract::extract_json(&self.extract_input(url, Some(selector)))?)
81 }
82
83 #[must_use]
85 pub fn screenshot_png(&self) -> Option<&[u8]> {
86 self.screenshot_png.as_deref()
87 }
88
89 fn extract_input<'a>(&'a self, url: &'a str, selector: Option<&'a str>) -> crate::extract::ExtractInput<'a> {
90 crate::extract::ExtractInput::new(&self.html, url)
91 .with_layout_json(self.layout_json.as_deref())
92 .with_visibility_json(self.visibility_json.as_deref())
93 .with_a11y(self.a11y.as_deref())
94 .with_inner_text(Some(&self.inner_text))
95 .with_selector(selector)
96 .with_visibility(self.visibility_policy)
97 }
98
99 pub(crate) fn from_servo(page: crate::bridge::ServoPage) -> Self {
100 let title = {
101 let doc = dom_query::Document::from(page.html.as_str());
102 let t = doc.select("title").text().to_string();
103 if t.is_empty() { None } else { Some(t) }
104 };
105 let screenshot_png = page.screenshot.and_then(|img| {
106 let mut buf = std::io::Cursor::new(Vec::new());
107 img.write_to(&mut buf, image::ImageFormat::Png).ok()?;
108 Some(buf.into_inner())
109 });
110 Self {
111 html: page.html,
112 inner_text: page.inner_text.unwrap_or_default(),
113 title,
114 layout_json: page.layout_json,
115 visibility_json: page.visibility_json,
116 js_result: page.js_result,
117 console_messages: page.console_messages.into_iter().map(ConsoleMessage::from).collect(),
118 screenshot_png,
119 accessibility_tree: page.accessibility_tree,
120 a11y: page.a11y.map(Arc::new),
121 extracted: None,
122 visibility_policy: crate::visibility::VisibilityPolicy::default(),
123 }
124 }
125}
126
127#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
129#[non_exhaustive]
130pub struct ConsoleMessage {
131 pub level: ConsoleLevel,
133 pub message: String,
135}
136
137#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
139#[serde(rename_all = "lowercase")]
140#[non_exhaustive]
141pub enum ConsoleLevel {
142 Log,
144 Debug,
146 Info,
148 Warn,
150 Error,
152 Trace,
154}
155
156impl ConsoleLevel {
157 #[must_use]
159 pub fn as_str(&self) -> &'static str {
160 match self {
161 Self::Log => "log",
162 Self::Debug => "debug",
163 Self::Info => "info",
164 Self::Warn => "warn",
165 Self::Error => "error",
166 Self::Trace => "trace",
167 }
168 }
169}
170
171impl fmt::Display for ConsoleLevel {
172 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
173 f.pad(self.as_str())
174 }
175}
176
177impl From<crate::bridge::ConsoleLevel> for ConsoleLevel {
178 fn from(level: crate::bridge::ConsoleLevel) -> Self {
179 use crate::bridge::ConsoleLevel as Bridge;
180 match level {
181 Bridge::Log => Self::Log,
182 Bridge::Debug => Self::Debug,
183 Bridge::Info => Self::Info,
184 Bridge::Warn => Self::Warn,
185 Bridge::Error => Self::Error,
186 Bridge::Trace => Self::Trace,
187 }
188 }
189}
190
191impl From<crate::bridge::ConsoleMessage> for ConsoleMessage {
192 fn from(msg: crate::bridge::ConsoleMessage) -> Self {
193 Self {
194 level: msg.level.into(),
195 message: msg.message,
196 }
197 }
198}
199
200#[derive(Debug, Clone, Default)]
201pub(crate) enum FetchMode {
202 #[default]
203 Content,
204 Screenshot {
205 full_page: bool,
206 },
207 JavaScript(String),
208}
209
210#[must_use = "options do nothing until passed to fetch()"]
212#[derive(Debug, Clone)]
213pub struct FetchOptions {
214 pub(crate) url: String,
215 pub(crate) timeout: Option<Duration>,
216 pub(crate) settle: Option<Duration>,
217 pub(crate) mode: FetchMode,
218 pub(crate) user_agent: Option<String>,
219 pub(crate) extract_schema: Option<crate::schema::ExtractSchema>,
220 pub(crate) visibility: Option<crate::visibility::VisibilityPolicy>,
221 pub(crate) cookies: Vec<crate::cookies::CookieSpec>,
222}
223
224impl FetchOptions {
225 pub(crate) const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
228
229 pub(crate) const DEFAULT_SETTLE: Duration = Duration::ZERO;
232
233 pub fn new(url: &str) -> Self {
235 Self {
236 url: url.into(),
237 timeout: None,
238 settle: None,
239 mode: FetchMode::Content,
240 user_agent: None,
241 extract_schema: None,
242 visibility: None,
243 cookies: Vec::new(),
244 }
245 }
246
247 pub fn screenshot(url: &str, full_page: bool) -> Self {
249 Self {
250 mode: FetchMode::Screenshot { full_page },
251 ..Self::new(url)
252 }
253 }
254
255 pub fn javascript(url: &str, expression: impl Into<String>) -> Self {
257 Self {
258 mode: FetchMode::JavaScript(expression.into()),
259 ..Self::new(url)
260 }
261 }
262
263 pub fn timeout(mut self, timeout: Duration) -> Self {
265 self.timeout = Some(timeout);
266 self
267 }
268
269 pub fn settle(mut self, settle: Duration) -> Self {
271 self.settle = Some(settle);
272 self
273 }
274
275 pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
277 self.user_agent = Some(sanitize_user_agent(ua.into()));
278 self
279 }
280
281 pub fn schema(mut self, schema: crate::schema::ExtractSchema) -> Self {
283 self.extract_schema = Some(schema);
284 self
285 }
286
287 pub fn visibility(mut self, policy: crate::visibility::VisibilityPolicy) -> Self {
289 self.visibility = Some(policy);
290 self
291 }
292
293 pub fn cookies(mut self, cookies: Vec<crate::cookies::CookieSpec>) -> Self {
295 self.cookies = cookies;
296 self
297 }
298
299 pub(crate) fn effective_timeout(&self) -> Duration {
301 self.timeout.unwrap_or(Self::DEFAULT_TIMEOUT)
302 }
303
304 pub(crate) fn effective_settle(&self) -> Duration {
306 self.settle.unwrap_or(Self::DEFAULT_SETTLE)
307 }
308
309 pub(crate) fn effective_visibility(&self) -> crate::visibility::VisibilityPolicy {
311 self.visibility.unwrap_or_default()
312 }
313}
314
315pub fn fetch_blocking(opts: &FetchOptions) -> crate::error::Result<Page> {
317 if let Some(pdf_page) = pre_fetch(opts)? {
318 return Ok(pdf_page);
319 }
320 let bridge_opts = build_bridge_options(opts);
321 let servo_page = crate::bridge::fetch_page(bridge_opts).map_err(|e| map_engine_error(e, opts))?;
322 Ok(finalize_page(servo_page, opts))
323}
324
325pub async fn fetch(opts: &FetchOptions) -> crate::error::Result<Page> {
327 if let Some(pdf_page) = pre_fetch_async(opts).await? {
328 return Ok(pdf_page);
329 }
330 let bridge_opts = build_bridge_options(opts);
331 let servo_page = crate::bridge::fetch_page_async(bridge_opts)
332 .await
333 .map_err(|e| map_engine_error(e, opts))?;
334 Ok(finalize_page(servo_page, opts))
335}
336
337pub fn markdown_blocking(url: &str) -> crate::error::Result<String> {
339 fetch_blocking(&FetchOptions::new(url))?.markdown_with_url(url)
340}
341
342pub async fn markdown(url: &str) -> crate::error::Result<String> {
344 fetch(&FetchOptions::new(url)).await?.markdown_with_url(url)
345}
346
347pub fn extract_json_blocking(url: &str) -> crate::error::Result<String> {
349 fetch_blocking(&FetchOptions::new(url))?.extract_json_with_url(url)
350}
351
352pub async fn extract_json(url: &str) -> crate::error::Result<String> {
354 fetch(&FetchOptions::new(url)).await?.extract_json_with_url(url)
355}
356
357pub fn text_blocking(url: &str) -> crate::error::Result<String> {
359 Ok(fetch_blocking(&FetchOptions::new(url))?.inner_text)
360}
361
362pub async fn text(url: &str) -> crate::error::Result<String> {
364 Ok(fetch(&FetchOptions::new(url)).await?.inner_text)
365}
366
367fn pre_fetch(opts: &FetchOptions) -> crate::error::Result<Option<Page>> {
368 crate::net::ensure_crypto_provider();
369 crate::net::validate_url(&opts.url)?;
370
371 if matches!(opts.mode, FetchMode::Content)
372 && let Some(bytes) = crate::pdf::probe(&opts.url, opts.effective_timeout().as_secs().max(1))
373 {
374 return Ok(Some(pdf_page(&bytes)));
375 }
376
377 Ok(None)
378}
379
380async fn pre_fetch_async(opts: &FetchOptions) -> crate::error::Result<Option<Page>> {
381 crate::net::ensure_crypto_provider();
382 crate::net::validate_url(&opts.url)?;
383
384 if matches!(opts.mode, FetchMode::Content) {
385 let url = opts.url.clone();
386 let timeout_secs = opts.effective_timeout().as_secs().max(1);
387 let probe = tokio::task::spawn_blocking(move || crate::pdf::probe(&url, timeout_secs))
388 .await
389 .map_err(|e| Error::engine(anyhow::anyhow!("pdf probe task panicked: {e}"), Some(opts.url.clone())))?;
390 if let Some(bytes) = probe {
391 return Ok(Some(pdf_page(&bytes)));
392 }
393 }
394
395 Ok(None)
396}
397
398fn pdf_page(bytes: &[u8]) -> Page {
399 let text = crate::extract::extract_pdf(bytes);
400 Page {
401 html: String::new(),
402 inner_text: text,
403 ..Page::default()
404 }
405}
406
407fn build_bridge_options(opts: &FetchOptions) -> crate::bridge::FetchOptions<'_> {
408 crate::bridge::FetchOptions {
409 url: &opts.url,
410 timeout_secs: opts.effective_timeout().as_secs().max(1),
411 settle_ms: u64::try_from(opts.effective_settle().as_millis()).unwrap_or(u64::MAX),
412 user_agent: opts.user_agent.as_deref(),
413 cookies: &opts.cookies,
414 mode: match opts.mode {
415 FetchMode::Content => crate::bridge::FetchMode::Content { include_a11y: false },
416 FetchMode::Screenshot { full_page } => crate::bridge::FetchMode::Screenshot { full_page },
417 FetchMode::JavaScript(ref expr) => crate::bridge::FetchMode::ExecuteJs {
418 expression: expr.clone(),
419 },
420 },
421 }
422}
423
424fn finalize_page(servo_page: crate::bridge::ServoPage, opts: &FetchOptions) -> Page {
425 let mut page = Page::from_servo(servo_page);
426 page.visibility_policy = opts.effective_visibility();
427 if let Some(schema) = opts.extract_schema.as_ref() {
428 page.extracted = Some(schema.extract_from(&page.html));
429 }
430 page
431}
432
433fn map_engine_error(e: crate::bridge::EngineError, opts: &FetchOptions) -> Error {
434 match e {
435 crate::bridge::EngineError::Timeout(_) => Error::Timeout {
436 url: opts.url.clone(),
437 timeout: opts.effective_timeout(),
438 },
439 crate::bridge::EngineError::Other(e) => Error::engine(e, Some(opts.url.clone())),
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
448 fn fetch_options_defaults() {
449 let opts = FetchOptions::new("https://example.com");
450 assert_eq!(opts.url, "https://example.com");
451 assert_eq!(opts.timeout, None);
452 assert_eq!(opts.settle, None);
453 assert_eq!(opts.visibility, None);
454 assert!(matches!(opts.mode, FetchMode::Content));
455 }
456
457 #[test]
458 fn fetch_options_effective_defaults() {
459 let opts = FetchOptions::new("https://example.com");
460 assert_eq!(opts.effective_timeout(), Duration::from_secs(30));
461 assert_eq!(opts.effective_settle(), Duration::ZERO);
462 }
463
464 #[test]
465 fn fetch_options_caller_value_preserved() {
466 let opts = FetchOptions::new("https://example.com")
467 .timeout(Duration::from_secs(45))
468 .settle(Duration::from_millis(250));
469 assert_eq!(opts.timeout, Some(Duration::from_secs(45)));
470 assert_eq!(opts.settle, Some(Duration::from_millis(250)));
471 assert_eq!(opts.effective_timeout(), Duration::from_secs(45));
472 assert_eq!(opts.effective_settle(), Duration::from_millis(250));
473 }
474
475 #[test]
476 fn fetch_options_screenshot() {
477 let opts = FetchOptions::screenshot("https://example.com", true);
478 assert!(matches!(opts.mode, FetchMode::Screenshot { full_page: true }));
479 }
480
481 #[test]
482 fn fetch_options_javascript() {
483 let opts = FetchOptions::javascript("https://example.com", "document.title");
484 assert!(matches!(opts.mode, FetchMode::JavaScript(ref e) if e == "document.title"));
485 }
486
487 #[test]
488 fn fetch_options_chaining() {
489 let opts = FetchOptions::new("https://example.com")
490 .timeout(Duration::from_secs(60))
491 .settle(Duration::from_millis(500));
492 assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
493 assert_eq!(opts.settle, Some(Duration::from_millis(500)));
494 }
495
496 #[test]
497 fn fetch_user_agent_set() {
498 let opts = FetchOptions::new("https://example.com").user_agent("MyBot/1.0");
499 assert_eq!(opts.user_agent.as_deref(), Some("MyBot/1.0"));
500 }
501
502 #[test]
503 fn fetch_user_agent_default_is_none() {
504 let opts = FetchOptions::new("https://example.com");
505 assert!(opts.user_agent.is_none());
506 }
507
508 #[test]
509 fn fetch_user_agent_sanitizes_crlf() {
510 let opts = FetchOptions::new("https://example.com").user_agent("Bot\r\nX-Evil: yes");
511 assert_eq!(opts.user_agent.as_deref(), Some("Bot X-Evil: yes"));
512 }
513
514 #[test]
515 fn fetch_user_agent_sanitizes_null() {
516 let opts = FetchOptions::new("https://example.com").user_agent("Bot\0/1.0");
517 assert_eq!(opts.user_agent.as_deref(), Some("Bot /1.0"));
518 }
519
520 #[test]
521 fn fetch_user_agent_empty_string() {
522 let opts = FetchOptions::new("https://example.com").user_agent("");
523 assert_eq!(opts.user_agent.as_deref(), Some(""));
524 }
525
526 #[test]
527 fn page_markdown_from_html() {
528 let page = Page {
529 html: "<html><head><title>Test</title></head><body><p>hello world</p></body></html>".into(),
530 inner_text: "hello world".into(),
531 ..Page::default()
532 };
533 let md = page.markdown().unwrap();
534 assert!(md.contains("hello world"));
535 }
536
537 #[test]
538 fn page_extract_json_produces_valid_json() {
539 let page = Page {
540 html: "<html><head><title>Test</title></head><body><p>content</p></body></html>".into(),
541 inner_text: "content".into(),
542 ..Page::default()
543 };
544 let json = page.extract_json().unwrap();
545 let _: Value = serde_json::from_str(&json).expect("valid JSON");
546 }
547
548 #[test]
549 fn page_screenshot_png_none_by_default() {
550 let page = Page::default();
551 assert!(page.screenshot_png().is_none());
552 }
553
554 #[test]
555 fn page_markdown_with_selector_scopes_to_subtree() {
556 let page = Page {
557 html: "<html><body><article>keep</article><aside>drop</aside></body></html>".into(),
558 ..Page::default()
559 };
560 let md = page.markdown_with_selector("https://example.com", "article").unwrap();
561 assert!(md.contains("keep"));
562 assert!(!md.contains("drop"));
563 }
564
565 #[test]
566 fn page_extract_json_with_selector_includes_url() {
567 let page = Page {
568 html: "<html><body><article>scoped</article></body></html>".into(),
569 ..Page::default()
570 };
571 let json = page
572 .extract_json_with_selector("https://example.com/page", "article")
573 .unwrap();
574 let parsed: Value = serde_json::from_str(&json).expect("valid JSON");
575 assert_eq!(parsed["url"].as_str(), Some("https://example.com/page"));
576 assert!(parsed["text_content"].as_str().unwrap().contains("scoped"));
577 }
578
579 #[test]
580 fn page_markdown_with_selector_no_match_returns_empty() {
581 let page = Page {
582 html: "<html><body><article>x</article></body></html>".into(),
583 ..Page::default()
584 };
585 let md = page.markdown_with_selector("", ".nonexistent").unwrap();
586 assert!(md.is_empty());
587 }
588
589 #[test]
590 fn page_markdown_with_invalid_selector_returns_error() {
591 let page = Page {
592 html: "<html><body><p>x</p></body></html>".into(),
593 ..Page::default()
594 };
595 let err = page.markdown_with_selector("", "###invalid[[[").unwrap_err();
596 assert!(err.to_string().contains("invalid CSS selector"));
597 }
598
599 #[test]
600 fn page_markdown_with_empty_selector_returns_error() {
601 let page = Page {
602 html: "<html><body><p>x</p></body></html>".into(),
603 ..Page::default()
604 };
605 assert!(page.markdown_with_selector("", "").is_err());
606 }
607
608 #[test]
609 fn fetch_rejects_invalid_url() {
610 let result = fetch_blocking(&FetchOptions::new("not a url"));
611 assert!(result.is_err());
612 let err = result.unwrap_err();
613 assert!(matches!(err, Error::InvalidUrl { .. }));
614 }
615
616 #[test]
617 fn fetch_rejects_private_ip() {
618 let result = fetch_blocking(&FetchOptions::new("http://127.0.0.1/"));
619 assert!(result.is_err());
620 }
621
622 #[test]
623 fn fetch_rejects_file_scheme() {
624 let result = fetch_blocking(&FetchOptions::new("file:///etc/passwd"));
625 assert!(result.is_err());
626 }
627
628 mod page_from_servo {
629 use crate::bridge;
630 use crate::fetch::{ConsoleLevel, Page};
631
632 fn synthetic_image(w: u32, h: u32) -> image::RgbaImage {
633 image::RgbaImage::from_pixel(w, h, image::Rgba([255, 0, 0, 255]))
634 }
635
636 fn empty_servo_page() -> bridge::ServoPage {
637 bridge::ServoPage::default()
638 }
639
640 #[test]
641 fn extracts_title_from_html() {
642 let mut sp = empty_servo_page();
643 sp.html = "<html><head><title>Hello World</title></head></html>".into();
644 let page = Page::from_servo(sp);
645 assert_eq!(page.title.as_deref(), Some("Hello World"));
646 }
647
648 #[test]
649 fn title_is_none_when_tag_missing() {
650 let mut sp = empty_servo_page();
651 sp.html = "<html><body>no title here</body></html>".into();
652 let page = Page::from_servo(sp);
653 assert!(page.title.is_none());
654 }
655
656 #[test]
657 fn title_is_none_when_tag_empty() {
658 let mut sp = empty_servo_page();
659 sp.html = "<html><head><title></title></head></html>".into();
660 let page = Page::from_servo(sp);
661 assert!(page.title.is_none());
662 }
663
664 #[test]
665 fn title_is_none_for_empty_html() {
666 let page = Page::from_servo(empty_servo_page());
667 assert!(page.title.is_none());
668 }
669
670 #[test]
671 fn inner_text_none_becomes_empty_string() {
672 let sp = empty_servo_page();
673 assert!(sp.inner_text.is_none());
674 let page = Page::from_servo(sp);
675 assert_eq!(page.inner_text, "");
676 }
677
678 #[test]
679 fn screenshot_is_encoded_as_png() {
680 let mut sp = empty_servo_page();
681 sp.screenshot = Some(synthetic_image(8, 8));
682 let page = Page::from_servo(sp);
683 let bytes = page.screenshot_png().expect("screenshot encoded");
684 assert_eq!(&bytes[..8], b"\x89PNG\r\n\x1a\n", "PNG magic bytes");
685 }
686
687 #[test]
688 fn console_messages_empty_by_default() {
689 let page = Page::from_servo(empty_servo_page());
690 assert!(page.console_messages.is_empty());
691 }
692
693 #[test]
694 fn console_messages_preserve_all_six_levels() {
695 let cases = [
696 (bridge::ConsoleLevel::Log, ConsoleLevel::Log),
697 (bridge::ConsoleLevel::Debug, ConsoleLevel::Debug),
698 (bridge::ConsoleLevel::Info, ConsoleLevel::Info),
699 (bridge::ConsoleLevel::Warn, ConsoleLevel::Warn),
700 (bridge::ConsoleLevel::Error, ConsoleLevel::Error),
701 (bridge::ConsoleLevel::Trace, ConsoleLevel::Trace),
702 ];
703 for (src, expected) in cases {
704 let mut sp = empty_servo_page();
705 sp.console_messages = vec![bridge::ConsoleMessage {
706 level: src,
707 message: "msg".into(),
708 }];
709 let page = Page::from_servo(sp);
710 assert_eq!(
711 page.console_messages.len(),
712 1,
713 "console message lost for source level {src:?}",
714 );
715 assert_eq!(
716 page.console_messages[0].level, expected,
717 "level mapping wrong for source {src:?}",
718 );
719 }
720 }
721
722 #[test]
723 fn console_messages_preserve_ordering_across_levels() {
724 let mut sp = empty_servo_page();
725 sp.console_messages = vec![
726 bridge::ConsoleMessage {
727 level: bridge::ConsoleLevel::Info,
728 message: "first".into(),
729 },
730 bridge::ConsoleMessage {
731 level: bridge::ConsoleLevel::Error,
732 message: "second".into(),
733 },
734 bridge::ConsoleMessage {
735 level: bridge::ConsoleLevel::Warn,
736 message: "third".into(),
737 },
738 ];
739 let page = Page::from_servo(sp);
740 assert_eq!(page.console_messages.len(), 3);
741 assert_eq!(page.console_messages[0].message, "first");
742 assert_eq!(page.console_messages[1].message, "second");
743 assert_eq!(page.console_messages[2].message, "third");
744 assert_eq!(page.console_messages[0].level, ConsoleLevel::Info);
745 assert_eq!(page.console_messages[1].level, ConsoleLevel::Error);
746 assert_eq!(page.console_messages[2].level, ConsoleLevel::Warn);
747 }
748
749 #[test]
750 fn extracted_starts_as_none_until_schema_applied() {
751 let page = Page::from_servo(empty_servo_page());
752 assert!(page.extracted.is_none());
753 }
754
755 #[test]
756 fn full_round_trip_preserves_every_field() {
757 let sp = bridge::ServoPage {
758 html: "<html><head><title>T</title></head><body>B</body></html>".into(),
759 inner_text: Some("B".into()),
760 layout_json: Some("[]".into()),
761 visibility_json: Some("[]".into()),
762 screenshot: Some(synthetic_image(2, 2)),
763 js_result: Some("42".into()),
764 accessibility_tree: Some("{}".into()),
765 a11y: None,
766 console_messages: vec![bridge::ConsoleMessage {
767 level: bridge::ConsoleLevel::Log,
768 message: "x".into(),
769 }],
770 };
771 let page = Page::from_servo(sp);
772 assert_eq!(page.html, "<html><head><title>T</title></head><body>B</body></html>");
773 assert_eq!(page.inner_text, "B");
774 assert_eq!(page.title.as_deref(), Some("T"));
775 assert_eq!(page.layout_json.as_deref(), Some("[]"));
776 assert_eq!(page.js_result.as_deref(), Some("42"));
777 assert_eq!(page.accessibility_tree.as_deref(), Some("{}"));
778 assert_eq!(page.console_messages.len(), 1);
779 assert!(page.screenshot_png().is_some());
780 assert!(page.extracted.is_none());
781 }
782 }
783}